diff --git a/Probability/Probability/Matrix.lean b/Probability/Probability/Matrix.lean new file mode 100644 index 0000000..2d2b05b --- /dev/null +++ b/Probability/Probability/Matrix.lean @@ -0,0 +1,53 @@ +import Probability.Probability.Prelude + +import Probability.Probability.Defs +import Mathlib.Data.Matrix.Mul +import Mathlib.LinearAlgebra.Matrix.DotProduct + +namespace Matrix + +section ProbabilityMatrix + +structure ProbabilityMatrix (n : ℕ) : Type where + --(n x n) Matrix where each row is a probability distribution + P : (Matrix (Fin n) (Fin n) ℚ) + row_sum : P *ᵥ 1 = 1 + nneg : ∀ i j : Fin n, P i j ≥ 0 + +variable (n : ℕ) (Prob : ProbabilityMatrix n) (μ : Findist n) (r : Fin n → ℚ) (γ : ℚ) + + +theorem dist_prob_product_nneg : μ.p ᵥ* (Prob.P) ≥ 0 := by + unfold vecMul + intro j + apply dotProduct_nonneg_of_nonneg + exact μ.nneg + exact fun i => Prob.nneg i j + +theorem dist_prob_product_sum : μ.p ᵥ* (Prob.P) ⬝ᵥ 1 = 1 := by + rw [← dotProduct_mulVec] + calc μ.p ⬝ᵥ Prob.P *ᵥ 1 = μ.p ⬝ᵥ 1 := by rw[Prob.row_sum] + _ = 1 ⬝ᵥ μ.p := by rw[dotProduct_comm] + _ = 1 := by rw[μ.prob] + +end ProbabilityMatrix + +section RewardProcess + +--Discounted Markov Reward Process Definition +structure DMRP (n : ℕ) : Type where + r : Fin n → ℚ --rewards + Prob : ProbabilityMatrix n --transitions + γ : ℚ --discount + discount_in_range : 0 ≤ γ ∧ γ < 1 + +variable (n : ℕ) (Proc : DMRP n) (u : (Fin n) → ℚ) (v : (Fin n) → ℚ) + +def bellman_backup (v : Fin n → ℚ) : Fin n → ℚ := Proc.r + Proc.γ • Proc.Prob.P *ᵥ v + +notation "𝔹["v "//" Proc "]" => bellman_backup Proc v + +-- Looking for norm in mathlib +theorem bellman_backup_contraction : 1 = 1 := by sorry + +end RewardProcess diff --git a/blueprint/src/content.tex b/blueprint/src/content.tex index e312163..518d105 100644 --- a/blueprint/src/content.tex +++ b/blueprint/src/content.tex @@ -5,7 +5,6 @@ % If you want to split the blueprint content into several files then % the current file can be a simple sequence of \input. Otherwise It % can start with a \section or \chapter for instance. - \newcommand{\id}{\operatorname{id}} @@ -1290,6 +1289,31 @@ \subsection{Evaluation} %%\lean{MDPs.v_dp_π}\leanok \end{definition} +\section{Probability Maticies} + + +\begin{definition} +A probability matrix $\bold{P}$ is defined as an $n \times n$ matrix with the properties: +\begin{align*} + &\bold{P}_{ij} \geq 0 \quad \forall i,j \in \{1,2,\dots,n\} \\ + &\bold{P}\bold{1} = \bold{1} +\end{align*} +\end{definition} +\begin{theorem} +Let $\bold{d} \in \Delta^n$. Then +\[ +\bold{d}^{T}\bold{P} \in \Delta^n +\] +\end{theorem} +\begin{proof} +Let $i \in \{1,2,\dots,n\}$ and $\bold{p} = \bold{d}^{T}\bold{P} $. Then $p_i = \bold{d}^{T}\bold{P}_i$ where $\bold{P}_i$ is the $i^{th}$ column of $\bold{P}$. Since all the elements of both $\bold{P}_i$ and $\bold{d}$ are non-negative, their inner product must also be non-negative. Since +\begin{align*} + \bold{d}^{T}\bold{P}\bold{1} &= \bold{d}^{T}\bold{1} \\ + &= 1~, +\end{align*} +$\bold{p}$ satisfies that its elements are non-negative and sum to 1 making it a member of $\Delta^n$ +\end{proof} + %%% Local Variables: %%% coding: utf-8 %%% mode: LaTeX