Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions Probability/Probability/Matrix.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import Probability.Probability.Prelude

import Probability.Probability.Defs
import Mathlib.Data.Matrix.Mul
import Mathlib.LinearAlgebra.Matrix.DotProduct

namespace Matrix

section ProbabilityMatrix

structure ProbabilityMatrix (n : ℕ) : Type where
--(n x n) Matrix where each row is a probability distribution
P : (Matrix (Fin n) (Fin n) ℚ)
row_sum : P *ᵥ 1 = 1
nneg : ∀ i j : Fin n, P i j ≥ 0

variable (n : ℕ) (Prob : ProbabilityMatrix n) (μ : Findist n) (r : Fin n → ℚ) (γ : ℚ)


theorem dist_prob_product_nneg : μ.p ᵥ* (Prob.P) ≥ 0 := by
unfold vecMul
intro j
apply dotProduct_nonneg_of_nonneg
exact μ.nneg
exact fun i => Prob.nneg i j

theorem dist_prob_product_sum : μ.p ᵥ* (Prob.P) ⬝ᵥ 1 = 1 := by
rw [← dotProduct_mulVec]
calc μ.p ⬝ᵥ Prob.P *ᵥ 1 = μ.p ⬝ᵥ 1 := by rw[Prob.row_sum]
_ = 1 ⬝ᵥ μ.p := by rw[dotProduct_comm]
_ = 1 := by rw[μ.prob]

end ProbabilityMatrix

section RewardProcess

--Discounted Markov Reward Process Definition
structure DMRP (n : ℕ) : Type where
r : Fin n → ℚ --rewards
Prob : ProbabilityMatrix n --transitions
γ : ℚ --discount
discount_in_range : 0 ≤ γ ∧ γ < 1

variable (n : ℕ) (Proc : DMRP n) (u : (Fin n) → ℚ) (v : (Fin n) → ℚ)

def bellman_backup (v : Fin n → ℚ) : Fin n → ℚ := Proc.r + Proc.γ • Proc.Prob.P *ᵥ v

notation "𝔹["v "//" Proc "]" => bellman_backup Proc v

-- Looking for norm in mathlib
theorem bellman_backup_contraction : 1 = 1 := by sorry

end RewardProcess
26 changes: 25 additions & 1 deletion blueprint/src/content.tex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
% If you want to split the blueprint content into several files then
% the current file can be a simple sequence of \input. Otherwise It
% can start with a \section or \chapter for instance.

\newcommand{\id}{\operatorname{id}}


Expand Down Expand Up @@ -1290,6 +1289,31 @@ \subsection{Evaluation}
%%\lean{MDPs.v_dp_π}\leanok
\end{definition}

\section{Probability Maticies}


\begin{definition}
A probability matrix $\bold{P}$ is defined as an $n \times n$ matrix with the properties:
\begin{align*}
&\bold{P}_{ij} \geq 0 \quad \forall i,j \in \{1,2,\dots,n\} \\
&\bold{P}\bold{1} = \bold{1}
\end{align*}
\end{definition}
\begin{theorem}
Let $\bold{d} \in \Delta^n$. Then
\[
\bold{d}^{T}\bold{P} \in \Delta^n
\]
\end{theorem}
\begin{proof}
Let $i \in \{1,2,\dots,n\}$ and $\bold{p} = \bold{d}^{T}\bold{P} $. Then $p_i = \bold{d}^{T}\bold{P}_i$ where $\bold{P}_i$ is the $i^{th}$ column of $\bold{P}$. Since all the elements of both $\bold{P}_i$ and $\bold{d}$ are non-negative, their inner product must also be non-negative. Since
\begin{align*}
\bold{d}^{T}\bold{P}\bold{1} &= \bold{d}^{T}\bold{1} \\
&= 1~,
\end{align*}
$\bold{p}$ satisfies that its elements are non-negative and sum to 1 making it a member of $\Delta^n$
\end{proof}

%%% Local Variables:
%%% coding: utf-8
%%% mode: LaTeX
Expand Down