From 7abece5f7883640a821f7dcfa42af1282c078404 Mon Sep 17 00:00:00 2001 From: Keith Badger Date: Mon, 8 Dec 2025 16:15:43 -0500 Subject: [PATCH 1/3] added stochastic matricies and proofs that distributions multiplied by them on right is distribution --- Probability/Probability/Matrix.lean | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 Probability/Probability/Matrix.lean diff --git a/Probability/Probability/Matrix.lean b/Probability/Probability/Matrix.lean new file mode 100644 index 0000000..8471e8e --- /dev/null +++ b/Probability/Probability/Matrix.lean @@ -0,0 +1,29 @@ +import Probability.Probability.Prelude + +import Probability.Probability.Defs +import Mathlib.Data.Matrix.Mul +import Mathlib.LinearAlgebra.Matrix.DotProduct + +namespace Matrix + +structure ProbabilityMatrix (n : ℕ) : Type where + --(n x n) Matrix where each row is a probability distribution + P : (Matrix (Fin n) (Fin n) ℚ) + row_sum : P *ᵥ 1 = 1 + nneg : ∀ i j : Fin n, P i j ≥ 0 + +variable (n : ℕ) (Prob : ProbabilityMatrix n) (μ : Findist n) + + +theorem dist_prob_product_nneg : μ.p ᵥ* (Prob.P) ≥ 0 := by + unfold vecMul + intro j + apply dotProduct_nonneg_of_nonneg + exact μ.nneg + exact fun i => Prob.nneg i j + +theorem dist_prob_product_sum : μ.p ᵥ* (Prob.P) ⬝ᵥ 1 = 1 := by + rw [← dotProduct_mulVec] + calc μ.p ⬝ᵥ Prob.P *ᵥ 1 = μ.p ⬝ᵥ 1 := by rw[Prob.row_sum] + _ = 1 ⬝ᵥ μ.p := by rw[dotProduct_comm] + _ = 1 := by rw[μ.prob] From 265bd82901fe12c985d511cb2f1b46c936bc0aa9 Mon Sep 17 00:00:00 2001 From: Keith Badger Date: Mon, 8 Dec 2025 23:17:58 -0500 Subject: [PATCH 2/3] added discounted markov reward process --- Probability/Probability/Matrix.lean | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/Probability/Probability/Matrix.lean b/Probability/Probability/Matrix.lean index 8471e8e..2d2b05b 100644 --- a/Probability/Probability/Matrix.lean +++ b/Probability/Probability/Matrix.lean @@ -6,13 +6,15 @@ import Mathlib.LinearAlgebra.Matrix.DotProduct namespace Matrix +section ProbabilityMatrix + structure ProbabilityMatrix (n : ℕ) : Type where --(n x n) Matrix where each row is a probability distribution P : (Matrix (Fin n) (Fin n) ℚ) row_sum : P *ᵥ 1 = 1 nneg : ∀ i j : Fin n, P i j ≥ 0 -variable (n : ℕ) (Prob : ProbabilityMatrix n) (μ : Findist n) +variable (n : ℕ) (Prob : ProbabilityMatrix n) (μ : Findist n) (r : Fin n → ℚ) (γ : ℚ) theorem dist_prob_product_nneg : μ.p ᵥ* (Prob.P) ≥ 0 := by @@ -27,3 +29,25 @@ theorem dist_prob_product_sum : μ.p ᵥ* (Prob.P) ⬝ᵥ 1 = 1 := by calc μ.p ⬝ᵥ Prob.P *ᵥ 1 = μ.p ⬝ᵥ 1 := by rw[Prob.row_sum] _ = 1 ⬝ᵥ μ.p := by rw[dotProduct_comm] _ = 1 := by rw[μ.prob] + +end ProbabilityMatrix + +section RewardProcess + +--Discounted Markov Reward Process Definition +structure DMRP (n : ℕ) : Type where + r : Fin n → ℚ --rewards + Prob : ProbabilityMatrix n --transitions + γ : ℚ --discount + discount_in_range : 0 ≤ γ ∧ γ < 1 + +variable (n : ℕ) (Proc : DMRP n) (u : (Fin n) → ℚ) (v : (Fin n) → ℚ) + +def bellman_backup (v : Fin n → ℚ) : Fin n → ℚ := Proc.r + Proc.γ • Proc.Prob.P *ᵥ v + +notation "𝔹["v "//" Proc "]" => bellman_backup Proc v + +-- Looking for norm in mathlib +theorem bellman_backup_contraction : 1 = 1 := by sorry + +end RewardProcess From 7468c145de9dcec8c933f17316b81a74e56f4824 Mon Sep 17 00:00:00 2001 From: Keith Badger Date: Tue, 9 Dec 2025 00:17:09 -0500 Subject: [PATCH 3/3] added probability matrix proof to blueprint --- blueprint/src/content.tex | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/blueprint/src/content.tex b/blueprint/src/content.tex index e312163..518d105 100644 --- a/blueprint/src/content.tex +++ b/blueprint/src/content.tex @@ -5,7 +5,6 @@ % If you want to split the blueprint content into several files then % the current file can be a simple sequence of \input. Otherwise It % can start with a \section or \chapter for instance. - \newcommand{\id}{\operatorname{id}} @@ -1290,6 +1289,31 @@ \subsection{Evaluation} %%\lean{MDPs.v_dp_π}\leanok \end{definition} +\section{Probability Maticies} + + +\begin{definition} +A probability matrix $\bold{P}$ is defined as an $n \times n$ matrix with the properties: +\begin{align*} + &\bold{P}_{ij} \geq 0 \quad \forall i,j \in \{1,2,\dots,n\} \\ + &\bold{P}\bold{1} = \bold{1} +\end{align*} +\end{definition} +\begin{theorem} +Let $\bold{d} \in \Delta^n$. Then +\[ +\bold{d}^{T}\bold{P} \in \Delta^n +\] +\end{theorem} +\begin{proof} +Let $i \in \{1,2,\dots,n\}$ and $\bold{p} = \bold{d}^{T}\bold{P} $. Then $p_i = \bold{d}^{T}\bold{P}_i$ where $\bold{P}_i$ is the $i^{th}$ column of $\bold{P}$. Since all the elements of both $\bold{P}_i$ and $\bold{d}$ are non-negative, their inner product must also be non-negative. Since +\begin{align*} + \bold{d}^{T}\bold{P}\bold{1} &= \bold{d}^{T}\bold{1} \\ + &= 1~, +\end{align*} +$\bold{p}$ satisfies that its elements are non-negative and sum to 1 making it a member of $\Delta^n$ +\end{proof} + %%% Local Variables: %%% coding: utf-8 %%% mode: LaTeX