From 6fa5c4ec87fb781891221c389b0f8d28e4bb4472 Mon Sep 17 00:00:00 2001
From: Pierrick Philippe <pierrick.philippe@irisa.fr>
Date: Wed, 3 Jul 2024 16:11:22 +0200
Subject: [PATCH] Fixed (hopefully): Crash when looking for rvalue of calling
 frame variable

---
 scripts/cmake.sh                |  15 ++-
 scripts/copy_analyzer_header.sh |   9 +-
 scripts/update-alternatives.sh  |   6 +-
 src/crypto_taint_sm.cc          |  89 ++++++++++-------
 src/is_tainted_diagnostic.cc    | 164 ++++++++++++++++++++++++++++++++
 src/utils.cc                    |  16 +++-
 6 files changed, 258 insertions(+), 41 deletions(-)
 create mode 100644 src/is_tainted_diagnostic.cc

diff --git a/scripts/cmake.sh b/scripts/cmake.sh
index 1e13ae6..00789f2 100644
--- a/scripts/cmake.sh
+++ b/scripts/cmake.sh
@@ -1,4 +1,9 @@
 #!/bin/bash
+
+usage(){
+    echo "$0 <GCC_VERSION> [GENERATOR=Ninja]"
+}
+
 set -x
 
 if [ ! -d "./build" ]
@@ -8,9 +13,15 @@ fi
 
 if [ $# -eq 1 ]
 then
-    GENERATOR=$1
-else
+    GCC_VERSION=$1
     GENERATOR="Ninja"
+elif [ $# -eq 2 ]
+then
+    GCC_VERSION=$1
+    GENERATOR=$2
+else
+    usage
+    exit 1
 fi
 
 cd ./build
diff --git a/scripts/copy_analyzer_header.sh b/scripts/copy_analyzer_header.sh
index 6541de3..04c50f5 100644
--- a/scripts/copy_analyzer_header.sh
+++ b/scripts/copy_analyzer_header.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 usage() {
-    echo "$0 {trunk,custom}"
+    echo "$0 {trunk,custom,dev} [PATH_TO_GCC_SRC=$HOME/soft/gcc/src/gcc]"
 }
 
 weird() {
@@ -34,6 +34,13 @@ then
     fi
     cp $GCC_REPO/analyzer/*.h $DEST
 elif [ $1 = "custom" -a $BASE_VER = "13.0.1_custom" ]
+then
+    if [ ! -d $DEST ]
+    then
+        mkdir $DEST
+    fi
+    cp $GCC_REPO/analyzer/*.h $DEST
+elif [ $1 = "dev" -a $BASE_VER = "13.0.1_custom_dev" ]
 then
     if [ ! -d $DEST ]
     then
diff --git a/scripts/update-alternatives.sh b/scripts/update-alternatives.sh
index 3627ffa..2691051 100644
--- a/scripts/update-alternatives.sh
+++ b/scripts/update-alternatives.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 usage() {
-    echo "$0 {trunk,custom}"
+    echo "$0 {trunk,custom,dev}"
 }
 
 set -x
@@ -19,6 +19,10 @@ elif [ $1 = "custom" ]
 then
     sudo update-alternatives --set gcc $HOME/.local/bin/gcc_modified
     sudo update-alternatives --set g++ $HOME/.local/bin/g++_modified
+elif [ $1 = "dev" ]
+then
+    sudo update-alternatives --set gcc $HOME/.local/bin/gcc_dev
+    sudo update-alternatives --set g++ $HOME/.local/bin/g++_dev
 else
     usage
     exit 1
diff --git a/src/crypto_taint_sm.cc b/src/crypto_taint_sm.cc
index 3c4efd6..05ba04c 100644
--- a/src/crypto_taint_sm.cc
+++ b/src/crypto_taint_sm.cc
@@ -182,7 +182,7 @@ namespace crypto_taint {
     bool crypto_taint_state_machine::can_purge_p (state_machine::state_t state) const {
         LOG_SCOPE(this->get_logger());
         this->log("state = %s", state->get_name());
-        return false;
+        return state != this->m_start;
     }
 
     void crypto_taint_state_machine::on_pop_frame (sm_state_map *smap, const frame_region *frame_reg, tree result_lvalue, const gimple *call, sm_context *sm_ctx, const supernode *node) const {
@@ -213,40 +213,64 @@ namespace crypto_taint {
                     decl_reg->dump_to_pp(pp, false);
                     logger->end_log_line();
                 }
-                if (handle_result) {
-                    if (tree decl = reg->maybe_get_decl()) {
-                        imm_use_iterator imm_iter;
-                        use_operand_p use_p;
-                        FOR_EACH_IMM_USE_FAST (use_p, imm_iter, decl) {
-                            gimple * use_stmt = USE_STMT(use_p);
-                            if (is_a<greturn *>(use_stmt)) {
+                if (tree decl = reg->maybe_get_decl()) {
+                    imm_use_iterator imm_iter;
+                    use_operand_p use_p;
+                    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, decl) {
+                        gimple * use_stmt = USE_STMT(use_p);
+                        if (is_a<greturn *>(use_stmt) && handle_result) {
+                            if (logger) {
+                                logger->start_log_line();
+                                logger->log_partial("Return stmt found in uses for ");
+                                dump_quoted_tree(logger->get_printer(), decl);
+                                logger->end_log_line();
+                            }
+                            if (entry.m_state == this->m_tainted) {
                                 if (logger) {
+                                    auto pp = logger->get_printer();
                                     logger->start_log_line();
-                                    logger->log_partial("Return stmt found in uses for ");
-                                    dump_quoted_tree(logger->get_printer(), decl);
+                                    logger->log_partial("Returned tree ");
+                                    dump_quoted_tree(pp, decl);
+                                    logger->log_partial(" is tainted, tainting caller's lvalue (");
+                                    dump_quoted_tree(pp, result_lvalue);
+                                    pp_right_paren(pp);
                                     logger->end_log_line();
                                 }
-                                if (entry.m_state == this->m_tainted) {
-                                    if (logger) {
-                                        auto pp = logger->get_printer();
-                                        logger->start_log_line();
-                                        logger->log_partial("Returned tree ");
-                                        dump_quoted_tree(pp, decl);
-                                        logger->log_partial(" is tainted, tainting caller's lvalue (");
-                                        dump_quoted_tree(pp, result_lvalue);
-                                        pp_right_paren(pp);
-                                        logger->end_log_line();
-                                    }
-                                }
-                                auto caller_reg = frame_reg->get_calling_frame()->get_region_for_local(sm_ctx->get_new_program_state()->m_region_model->get_manager(), result_lvalue, nullptr);
-                                sm_ctx->on_transition(node, call, caller_reg, this->m_start, this->m_tainted, reg);
                             }
+                            auto caller_reg = frame_reg->get_calling_frame()->get_region_for_local(sm_ctx->get_new_program_state()->m_region_model->get_manager(), result_lvalue, nullptr);
+                            sm_ctx->on_transition(node, call, caller_reg, this->m_start, this->m_tainted, reg);
                         }
+                        // if (is_a<gassign *>(use_stmt)) {
+                        //     if (logger) {
+                        //         logger->start_log_line();
+                        //         logger->log_partial("Found an assign stmt for ");
+                        //         dump_quoted_tree(logger->get_printer(), decl);
+                        //         logger->log_partial(" | stmt:");
+                        //         logger->end_log_line();
+                        //         print_gimple_stmt(logger->get_file(), const_cast<gimple*>(use_stmt), logger->get_printer()->indent_skip, TDF_VOPS|TDF_MEMSYMS);
+                        //     }
+                        //     auto lhs = gimple_assign_lhs(use_stmt);
+                        //     if (TREE_CODE(lhs) == MEM_REF)
+                        //         lhs = TREE_OPERAND(lhs, 0);
+                        //     if (this->is_tainted(sm_ctx, use_stmt, lhs)) {
+                        //         if (logger) {
+                        //             logger->start_log_line();
+                        //             logger->log_partial("lhs tainted on stmt: ");
+                        //             dump_quoted_tree(logger->get_printer(), lhs);
+                        //             logger->end_log_line();
+                        //         }
+                        //     }
+                        // }
                     }
                 }
                 if (decl_reg->get_parent_region() == frame_reg) {
-                    if (logger)
-                        logger->log("Clearing any state...");
+                    if (logger) {
+                        auto pp = logger->get_printer();
+                        logger->start_log_line();
+                        logger->log_partial("Clearing any state for: ");
+                        decl_reg->dump_to_pp(pp, false);
+                        logger->end_log_line();
+                    }
                     smap->clear_any_state(reg);
                 }
             }
@@ -542,16 +566,15 @@ namespace crypto_taint {
                         }
                         if (this->is_tainted(sm_ctx, stmt, arg1)) {
                             utils::dump_taint(this->get_logger(), sm_ctx, any_pointer_p(lhs), lhs, arg1);
-                            // if (reg)
-                            //     sm_ctx->on_transition(node, stmt, reg, this->m_start, this->m_tainted, sm_ctx->get_diagnostic_tree(arg1));
-                            // else
-                            sm_ctx->on_transition(node, stmt, lhs, this->m_start, this->m_tainted, 
-                                sm_ctx->get_diagnostic_tree(arg1), !any_pointer_p(lhs));
+                            sm_ctx->on_transition(node, stmt, lhs,
+                                this->m_start, this->m_tainted, 
+                                sm_ctx->get_diagnostic_tree(arg1),
+                                !any_pointer_p(lhs));
                             if (lhs_is_array_or_ptr_expr || lhs_is_mem_ref) {
                                 if (tree converted = utils::array::convert_view(sm_ctx, lhs, this->get_logger())) {
-                                    utils::dump_taint(this->get_logger(), sm_ctx, any_pointer_p(converted), converted, lhs);
+                                    utils::dump_taint(this->get_logger(), sm_ctx, any_pointer_p(converted), converted, arg1);
                                     sm_ctx->on_transition(node, stmt, converted, this->m_start,
-                                        this->m_tainted, lhs, !any_pointer_p(converted));
+                                        this->m_tainted, arg1, !any_pointer_p(converted));
                                 }
                             }
                             ret = true;
diff --git a/src/is_tainted_diagnostic.cc b/src/is_tainted_diagnostic.cc
new file mode 100644
index 0000000..2223d02
--- /dev/null
+++ b/src/is_tainted_diagnostic.cc
@@ -0,0 +1,164 @@
+#include <memory>
+#include <string>
+
+#include <gcc-plugin.h>
+#include <tree.h>
+#include <json.h>
+#include <diagnostic.h>
+#include <diagnostic-event-id.h>
+#include <gimple.h>
+#include <analyzer/analyzer.h>
+#include <analyzer/analyzer-logging.h>
+#include <analyzer/sm.h>
+#include <analyzer/pending-diagnostic.h>
+#include <analyzer/diagnostic-manager.h>
+
+// Using namespace ana to avoid error in following analyzer's include
+using namespace ana;
+
+#include <analyzer/store.h>
+#include <analyzer/region.h>
+#include <analyzer/checker-event.h>
+
+#include "crypto_taint_sm.h"
+#include "is_tainted_diagnostic.h"
+#include "utils.h"
+
+namespace ana {
+
+    static std::string dump_state_change(const evdesc::state_change &ev) {
+        /*
+        tree m_expr;
+        tree m_origin;
+        state_machine::state_t m_old_state;
+        state_machine::state_t m_new_state;
+        diagnostic_event_id_t m_event_id;
+        const state_change_event &m_event;
+        */
+        char buf[50] = { 0 };
+        std::string res ("state_change: { m_expr: ");
+        if (crypto_taint::utils::get_name(ev.m_expr, buf, 50) == 1)
+            res.append(buf);
+        else
+            res.append("<anon>");
+
+        res.append(", m_origin: ");
+        if (crypto_taint::utils::get_name(ev.m_origin, buf, 50) == 1)
+            res.append(buf);
+        else
+            res.append("<anon>");
+        res.append(", m_old_state: ");
+        res.append(ev.m_old_state->get_name());
+        res.append(", m_new_state: ");
+        res.append(ev.m_new_state->get_name());
+        res.append("}");
+
+        return res;
+    }
+
+    static std::string dump_final_event(const evdesc::final_event &ev) {
+        /*
+        tree m_expr;
+        state_machine::state_t m_state;
+        const warning_event &m_event; // Can not dump it
+        */
+        char buf[50] = { 0 };
+        std::string res ("final_event: { m_expr: ");
+        if (crypto_taint::utils::get_name(ev.m_expr, buf, 50) == 1)
+            res.append(buf);
+        else
+            res.append("<anon>");
+
+        res.append(", m_state: ");
+        res.append(ev.m_state->get_name());
+        res.append("}");
+
+        return res;
+    }
+
+    // BEGIN crypto_taint_diagnostic impl
+
+    is_tainted_diagnostic::is_tainted_diagnostic(const crypto_taint::crypto_taint_state_machine& sm, tree src)
+        : m_sm(sm), m_src(src)
+    {}
+
+    bool is_tainted_diagnostic::subclass_equal_p(const pending_diagnostic &other) const {
+        return same_tree_p(this->m_src,((is_tainted_diagnostic &) other).m_src);
+    }
+
+    label_text is_tainted_diagnostic::describe_state_change (const evdesc::state_change &change) {
+        // TODO: Clean this shit
+        // fnotice(stderr, "%s\n", dump_state_change(change).data());
+        if (change.m_new_state == this->m_sm.m_tainted) {
+            auto stmt = change.m_event.m_stmt;
+            // Check whether we're dealing with a COMPONENT_REF, i.e. a struct access, or not
+            if (change.m_expr && !any_pointer_p(change.m_expr) && stmt && gimple_code(stmt) == GIMPLE_ASSIGN) {
+                if (tree lhs = gimple_assign_lhs(stmt)) {
+                    if (TREE_CODE(lhs) == COMPONENT_REF) {
+                        if (change.m_origin)
+                            return change.formatted_print("%qE is tainted here because of %qE", 
+                                lhs, change.m_origin);
+                        else
+                            return change.formatted_print("%qE is tainted here", lhs);
+                    }
+                    else if (change.m_origin)
+                        return change.formatted_print("%qE gets tainted here because of %qE", change.m_expr, change.m_origin);
+                    // Fallback to printing the svalue
+                    return change.formatted_print("%qE gets tainted here", change.m_expr);
+                }
+            }
+            // Try to get the ptr directly from the stmt
+            else if (stmt && gimple_code(stmt) == GIMPLE_ASSIGN) {
+                auto var = gimple_assign_lhs(stmt);
+                if (var && change.m_origin) {
+                    if (any_pointer_p(var))
+                        return change.formatted_print("%qE points-to tainted data %qE here", var, change.m_origin);
+                    else
+                        return change.formatted_print("%qE gets tainted here because of %qE", change.m_expr, change.m_origin);
+                }
+                else if (var) return change.formatted_print("%qE points-to tainted data", var);
+            }
+            else if (change.m_origin)
+                return change.formatted_print("%qE gets tainted here because of %qE", change.m_expr, change.m_origin);
+            // Fallback to printing the svalue
+            return change.formatted_print("%qE gets tainted here", change.m_expr);
+        }
+        return label_text();
+    }
+
+    // END crypto_taint_diagnostic impl
+
+    // BEGIN constant_time_diagnostic impl
+
+    taint_diagnostic::taint_diagnostic(const crypto_taint::crypto_taint_state_machine &sm, tree src)
+        : is_tainted_diagnostic(sm, src)
+    {}
+
+    const char *taint_diagnostic::get_kind() const {
+        return "taint_diagnostic";
+    }
+
+    int taint_diagnostic::get_controlling_option() const {
+        return 1;
+    }
+
+    label_text taint_diagnostic::describe_final_event(const evdesc::final_event &ev) {
+        if (ev.m_state == this->m_sm.m_tainted) {
+            // Check if source of the problem is a struct access
+            if (TREE_CODE(this->m_src) == COMPONENT_REF)
+                return ev.formatted_print("field %qE is tainted", this->m_src);
+            else if (TREE_CODE(this->m_src) == ARRAY_REF)
+                return ev.formatted_print("element %qE is_tainted", this->m_src);
+            else if (any_pointer_p(this->m_src))
+                return ev.formatted_print("pointer %qE is tainted", this->m_src);    
+            else return ev.formatted_print("l-value %qE is tainted", this->m_src);
+        }
+        return label_text();
+    }
+
+    bool taint_diagnostic::emit(rich_location * loc) {
+        return warning_at(loc, this->get_controlling_option(), "%qE is tainted", this->m_src);
+    }
+
+    // END constant_time_diagnostic impl
+}
\ No newline at end of file
diff --git a/src/utils.cc b/src/utils.cc
index 43546de..4feb110 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -530,14 +530,21 @@ namespace array {
 
         tree res = NULL_TREE;
 
-        if (logger)
-            LOG_SCOPE(logger);
+        LOG_SCOPE(logger);
 
         auto code = TREE_CODE(t);
         auto model = sm_ctx->get_new_program_state()->m_region_model;
         const region *reg = nullptr;
         const svalue *sval = nullptr;
 
+        if (logger) {
+            auto pp = logger->get_printer();
+            logger->start_log_line();
+            logger->log_partial("t: ");
+            dump_quoted_tree(pp, t);
+            logger->end_log_line();
+        }
+
         switch (code) {
             case ARRAY_REF:
                 reg = model->get_lvalue(t, nullptr);
@@ -546,8 +553,9 @@ namespace array {
                 break;
             case POINTER_PLUS_EXPR:
                 sval = model->get_rvalue(t, nullptr);
-                if (TREE_CODE(sval->get_type()) == ARRAY_TYPE)
-                    reg = offset_to_elm(sm_ctx, sval, nullptr);
+                if (TREE_CODE(sval->get_type()) == ARRAY_TYPE
+                    || TREE_CODE(sval->get_type()) == POINTER_TYPE)
+                    reg = offset_to_elm(sm_ctx, sval, logger);
                 res = reg ? model->get_representative_tree(reg) : t;
                 break;
             default:
-- 
GitLab