diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..ef2e5af --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions every day + interval: "daily" + time: "09:00" + timezone: "UTC" + assignees: + - "jimboid" diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..4b98457 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,185 @@ +name: ci/cd +on: + pull_request: + repository_dispatch: + types: [build] + workflow_dispatch: + +jobs: + build: + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + runs-on: ${{ matrix.platform == 'linux/amd64' && 'ubuntu-24.04' || matrix.platform == 'linux/arm64' && 'ubuntu-24.04-arm' }} + name: build ${{ matrix.platform }} + outputs: + tag: ${{ steps.envvars.outputs.tag }} + steps: + - name: checkout + uses: actions/checkout@v5.0.0 + + - name: Prepare env + id: envvars + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + if [ ${{ github.event.client_payload.tag }} != 'null' ]; then + echo "tag=${{ github.event.client_payload.tag }}" >> $GITHUB_OUTPUT + else + echo "tag=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + fi + + - name: Metadata + id: meta + uses: docker/metadata-action@v5.8.0 + with: + images: ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }} + + - name: Authenticate with GHCR + id: auth + uses: docker/login-action@v3.5.0 + with: + registry: ghcr.io + username: ${{github.actor}} + password: ${{secrets.BUILD_TOKEN}} + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3.11.1 + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6.18.0 + with: + file: ./docker/Dockerfile + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + tags: ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }} + outputs: type=image,push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p ${{ runner.temp }}/digests + digest="${{ steps.build.outputs.digest }}" + touch "${{ runner.temp }}/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4.6.2 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: ${{ runner.temp }}/digests/* + if-no-files-found: error + retention-days: 1 + + merge: + runs-on: ubuntu-24.04 + name: merge into multiarch manifest + needs: + - build + steps: + - name: Download digests + uses: actions/download-artifact@v5.0.0 + with: + path: ${{ runner.temp }}/digests + pattern: digests-* + merge-multiple: true + + - name: Authenticate with GHCR + id: auth + uses: docker/login-action@v3.5.0 + with: + registry: ghcr.io + username: ${{github.actor}} + password: ${{secrets.BUILD_TOKEN}} + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3.11.1 + + - name: Metadata + id: meta + uses: docker/metadata-action@v5.8.0 + with: + images: ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }} + tags: dev + + - name: Create manifest list and push + id: annotate + continue-on-error: true + working-directory: ${{ runner.temp }}/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + --annotation='index:org.opencontainers.image.description=${{ github.event.repository.description }}' \ + --annotation='index:org.opencontainers.image.licenses=MIT' \ + --annotation='index:org.opencontainers.image.created=${{ steps.timestamp.outputs.timestamp }}' \ + --annotation='index:org.opencontainers.image.url=${{ github.event.repository.url }}' \ + --annotation='index:org.opencontainers.image.source=${{ github.event.repository.url }}' \ + $(printf 'ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }}@sha256:%s ' *) + + - name: Create manifest list and push without annotations + if: steps.annotate.outcome == 'failure' + working-directory: ${{ runner.temp }}/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf 'ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }}:dev + + tests: + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + runs-on: ${{ matrix.platform == 'linux/amd64' && 'ubuntu-latest' || matrix.platform == 'linux/arm64' && 'ubuntu-24.04-arm' }} + name: testing on ${{ matrix.platform }} + timeout-minutes: 360 + needs: + - build + - merge + steps: + + - name: Test notebooks + shell: bash + run: | + docker run -t ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }}:dev bash -c "\ + pip install pytest nbmake; \ + find . -name '*.ipynb' | pytest --nbmake --nbmake-timeout=3600 --ignore=answers; " + + tags: + runs-on: ubuntu-24.04 + if: github.event_name != 'pull_request' + name: add tags + needs: + - build + - tests + steps: + - name: Authenticate with GHCR + id: auth + uses: docker/login-action@v3.5.0 + with: + registry: "ghcr.io" + username: ${{github.actor}} + password: ${{secrets.BUILD_TOKEN}} + + - name: tag release versions + shell: bash + run: | + docker buildx imagetools create \ + --tag ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }}:latest \ + --tag ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }}:${{ needs.build.outputs.tag }} \ + ghcr.io/${{ vars.ORG_REPO }}/${{ github.event.repository.name }}:dev + + - name: Post version update to dash + uses: peter-evans/repository-dispatch@v3.0.0 + with: + token: ${{ secrets.BUILD_TOKEN }} + repository: jimboid/biosim-workshops-dash + event-type: build + client-payload: '{"repo": "${{ github.event.repository.name }}", "tag": "${{ needs.build.outputs.tag }}"}' diff --git a/README.md b/README.md index 02ef490..b982c83 100644 --- a/README.md +++ b/README.md @@ -1,143 +1,17 @@ -## Aimed at: -Anyone interested in using Python for Biomodelling. The material in -this workshop will help you get the most out of the other workshops -run during the week. +# CCPBioSim Basic Python Workshop -## Requirements: -Basic knowledge of Python e.g. as in [https://chryswoods.com/beginning_python](https://chryswoods.com/beginning_python) +[![build](https://github.com/ccpbiosim/python-workshop/actions/workflows/build.yaml/badge.svg?branch=main)](https://github.com/ccpbiosim/python-workshop/actions/workflows/build.yaml) -## Abstract: -This workshop will introduce more intermediate features of Python that -are useful for biomolecular modellers. This will include the use of -Jupyter notebooks, how to write Python functions and classes, and -how to properly structure, document and test code. The second -part will introduce you to data analysis tools such as Pandas, -NumPy and MatplotLib. +## Docker -## Training Material +This container is derived from the CCPBioSim JupyterHub image. This container +adds the necessary software packages and notebook content to form a deployable +course container. The source content for this course can be found at +https://github.com/CCPBioSim/python-and-data-workshop -The workshop consists of a series of Jupyter notebooks. These are available -below, and can be run using the -workshop Jupyter server. +## How to Use -Once you have started the server, navigate to the `python_and_data` directory -and you will find all workshop material there. +In our containers we are using the JupyterHub default port 8888, so you should +forward this port when deploying locally:: -The workshops are numbered sequentially from `01_jupyter_howto.ipynb` to -`17_regular_expressions.ipynb`. They cover a variety of useful Python topics, -and are *mostly* independent. Feel free to go through them in the order you -prefer, and to skip topics that you feel you are already comfortable with. - -There are exercises in many of the topics. You can find answers in the -equivalent notebook in the `answers` directory. - -Below is a summary of each topic, together with links to view html versions -of the notebooks the their answers, and to download the notebooks. - -## Contents - -### [01_jupyter_howto.ipynb](html/01_jupyter_howto.html) ([answers](html/answers/01_jupyter_howto.html)) - -Introduction to Jupyter notebooks, including how to use the interface, -how to view molecules, draw graphs, download files, and start a bash -terminal. - -[download](01_jupyter_howto.ipynb) | [download answers](answers/01_jupyter_howto.ipynb) - -### [02_lists.ipynb](html/02_lists.html) ([answers](html/answers/02_lists.html)) - -Learn how to use Python Lists - -[download](02_lists.ipynb) | [download answers](answers/02_lists.ipynb) - -### [03_dictionaries.ipynb](html/03_dictionaries.html) ([answers](html/answers/03_dictionaries.html)) - -Learn how to use Python dictionaries - -[download](03_dictionaries.ipynb) | [download answers](answers/03_dictionaries.ipynb) - -### [04_functions.ipynb](html/04_functions.html) ([answers](html/answers/04_functions.html)) - -Learn how to write a function in Python - -[download](04_functions.ipynb) | [download answers](answers/04_functions.ipynb) - -### [05_objects.ipynb](html/05_objects.html) ([answers](html/answers/05_objects.html)) - -Learn about objects and object orientated programming - -[download](05_objects.ipynb) | [download answers](answers/05_objects.ipynb) - -### [06_classes.ipynb](html/06_classes.html) ([answers](html/answers/06_classes.html)) - -Learn how to write your own Python classes - -[download](06_classes.ipynb) | [download answers](answers/06_classes.ipynb) - -### [07_documentation.ipynb](html/07_documentation.html) ([answers](html/answers/07_documentation.html)) - -Learn how to add documentation to your code - -[download](07_documentation.ipynb) | [download answers](answers/07_documentation.ipynb) - -### [08_class_documentation.ipynb](html/08_class_documentation.html) ([answers](html/answers/08_class_documentation.html)) - -Learn how to document Python classes, and protect hidden (private) functions -and data from view. - -[download](08_class_documentation.ipynb) | [download answers](answers/08_class_documentation.ipynb) - -### [09_exceptions.ipynb](html/09_exceptions.html) ([answers](html/answers/09_exceptions.html)) - -Learn about exceptions, and how they can be used to signify errors. - -[download](09_exceptions.ipynb) | [download answers](answers/09_exceptions.ipynb) - -### [10_error_handling.ipynb](html/10_error_handling.html) ([answers](html/answers/10_error_handling.html)) - -Learn how to handle errors by catching exceptions - -[download](10_error_handling.ipynb) | [download answers](answers/10_error_handling.ipynb) - -### [11_modules.ipynb](html/11_modules.html) ([answers](html/answers/11_modules.html)) - -Learn how to package and share your code as a module - -[download](11_modules.ipynb) | [download answers](answers/11_modules.ipynb) - -### [12_pandas.ipynb](html/12_pandas.html) ([answers](html/answers/12_pandas.html)) - -Learn how to use the pandas library for data analysis - -[download](12_pandas.ipynb) | [download answers](answers/12_pandas.ipynb) - -### [13_basic_numpy.ipynb](html/13_basic_numpy.html) ([answers](html/answers/13_basic_numpy.html)) - -Learn how to use the NumPy library for numeric calculation - -[download](13_basic_numpy.ipynb) | [download answers](answers/13_basic_numpy.ipynb) - -### [14_more_numpy.ipynb](html/14_more_numpy.html) ([answers](html/answers/14_more_numpy.html)) - -Learn more about how to use NumPy, including understanding copies and views - -[download](14_more_numpy.ipynb) | [download answers](answers/14_more_numpy.ipynb) - -### [15_matplotlib.ipynb](html/15_matplotlib.html) ([answers](html/answers/15_matplotlib.html)) - -Learn how to use the pandas with the MatPlotLib library to draw graphs - -[download](15_matplotlib.ipynb) | [download answers](answers/15_matplotlib.ipynb) - -### [16_viewing_molecules.ipynb](html/16_viewing_molecules.html) ([answers](html/answers/16_viewing_molecules.html)) - -Learn how to use the nglview and BioSimSpace libraries to create 3D -views of molecules. - -[download](16_viewing_molecules.ipynb) | [download answers](answers/16_viewing_molecules.ipynb) - -### [17_regular_expressions.ipynb](html/17_regular_expressions.html) ([answers](html/answers/17_regular_expressions.html)) - -Learn how to understand and write regular expressions for text/pattern matching. - -[download](17_regular_expressions.ipynb) | [download answers](answers/17_regular_expressions.ipynb) + docker run -p 8888:8888 ghcr.io/jimboid/biosim-python-workshop:latest diff --git a/_config.yml b/_config.yml deleted file mode 100644 index cd6e0ea..0000000 --- a/_config.yml +++ /dev/null @@ -1,3 +0,0 @@ -theme: jekyll-theme-slate -title: [Python for Biomolecular Modellers] -description: [CCP-BioSim Workshop] diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..d65e09f --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,29 @@ +# Start with BioSim base image. +ARG BASE_IMAGE=latest +FROM ghcr.io/ccpbiosim/jupyterhub-base:$BASE_IMAGE + +LABEL maintainer="James Gebbie-Rayet " +LABEL org.opencontainers.image.source=https://github.com/jimboid/biosim-python-workshop +LABEL org.opencontainers.image.description="A container environment for the ccpbiosim workshop on Python." +LABEL org.opencontainers.image.licenses=MIT + +# Switch to jovyan user. +USER $NB_USER +WORKDIR $HOME + +# Install workshop deps +RUN conda install matplotlib numpy pandas nglview ipywidgets -y +RUN pip install mdtraj + +# Get workshop files and move them to jovyan directory. +COPY --chown=1000:100 . . +RUN rm -rf _config.yml AUTHORS README.md docker .git .github + +# Copy lab workspace +COPY --chown=1000:100 docker/default-37a8.jupyterlab-workspace /home/jovyan/.jupyter/lab/workspaces/default-37a8.jupyterlab-workspace + +# UNCOMMENT THIS LINE FOR REMOTE DEPLOYMENT +COPY docker/jupyter_notebook_config.py /etc/jupyter/ + +# Always finish with non-root user as a precaution. +USER $NB_USER diff --git a/docker/default-37a8.jupyterlab-workspace b/docker/default-37a8.jupyterlab-workspace new file mode 100644 index 0000000..d97d437 --- /dev/null +++ b/docker/default-37a8.jupyterlab-workspace @@ -0,0 +1 @@ +{"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":0,"widgets":["notebook:01_jupyter_howto.ipynb"]},"current":"notebook:01_jupyter_howto.ipynb"},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector"]}},"notebook:01_jupyter_howto.ipynb":{"data":{"path":"01_jupyter_howto.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}} diff --git a/docker/jupyter_notebook_config.py b/docker/jupyter_notebook_config.py new file mode 100644 index 0000000..a3e5d82 --- /dev/null +++ b/docker/jupyter_notebook_config.py @@ -0,0 +1,2 @@ + +c.JupyterHub.authenticator_class = 'tmpauthenticator.TmpAuthenticator' diff --git a/html/01_jupyter_howto.html b/html/01_jupyter_howto.html deleted file mode 100644 index a6e5a0b..0000000 --- a/html/01_jupyter_howto.html +++ /dev/null @@ -1,12303 +0,0 @@ - - - -01_jupyter_howto - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

What is Jupyter and how do you use it?

Project Jupyter exists to develop software and standards for interactive computing. Jupyter, which you are using now, provides an interactive notebook which runs on a cloud-server, and with which you interact using a web browser.

-

As installed, Jupyter provides two things:

-
    -
  • An interactive Python notebook, which you are using now (this is an interactive notebook!)
  • -
  • An interactive bash terminal (we will use this later)
  • -
-

What is an interactive Python notebook? It is a notebook that mixes documentation (like this!) with Python code (like below), with the output of that code.

-

For example, below is the Python code to print "Hello World". Click on the below code and then press "SHIFT+Return". You should see that the code is run, and below "Hello World" will be printed.

- -
-
-
-
-
-
In [ ]:
-
-
-
print("Hello World")
-
- -
-
-
- -
-
-
-
-
-
-

The above code is interactive because it runs when you pressed "SHIFT+Return". It is also interactive because you can change it. For example, click again on the code and change "Hello World" to "Hello Jupyter". Press "SHIFT+Return" again and you should see "Hello Jupyter" printed.

-

A code cell can have as much or little Python in it as you want. The below cell defines a function and then runs it in a loop. What do you think will be printed out when you select it and press "SHIFT+Return"? Have a go. Are you right?

- -
-
-
-
-
-
In [ ]:
-
-
-
def countDown(start):
-    for i in range(start, 0, -1):
-        print("%d..." % i)
-    print("Lift off!")
-    
-countDown(10)
-
- -
-
-
- -
-
-
-
-
-
-

Each cell can use the variables and functions defined in previous cells. For example, the next cell sets the value of the variable x to 5, while the following cell then uses that to call the countDown function.

- -
-
-
-
-
-
In [ ]:
-
-
-
x = 5
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
countDown(x)
-
- -
-
-
- -
-
-
-
-
-
-

You can go back up and edit the value of x. For example, set x to 7 and press SHIFT+Return. This has changed x. However, it doesn't re-run any code that depends on x. That is for you to do. You need to click on the next cell countDown(x) and press SHIFT+Return again. You should then see that the code is run and updated.

-

Because you can change any cell at any time, a notebook can get into a confused state. If we change x again to 21, then the cell countDown(x), is now out of date, as it depends on the old value of x. So, how can we know what state each cell is in? The state is based on the order in which cells are executed. This is shown by the little number next to each cell, e.g. see below.

-

Image showing Jupyter state

-

The number "10" shows that the line x = 21 was the 10th cell executed by Python, while the line countDown(x) was the 9th cell executed. As countDown(x) was executed before x = 21, it was not affected by x = 21. If you want it to be affected, you need to execute that cell again (so it is now the 11th cell executed), e.g.

-

Image showing updated Jupyer state

-

If you find yourself getting confused, then click "Kernel | Restart & Clear Output" from the menu above to clear all outputs, clear the state, and reset back to a clean notebook.

-

Image showing clear and restart

- -
-
-
-
-
-
-
-
-

Interactive Graphics

In addition to mixing documentation with interactive Python, the notebook also allows Python modules to embed interactive graphics. For example, the BioSimSpace Project provides a easy wrapper around the Python molecular viewer nglview. This can be used to view molecules from within a notebook.

-

Execute the cell below by selecting it and pressing SHIFT+Return. This will import the viewMolecules function from BioSimSpace, which will load the molecules contained in the file data/complex.pdb and will display it below.

- -
-
-
-
-
-
In [ ]:
-
-
-
from BioSimSpace import viewMolecules
-v = viewMolecules("data/complex.pdb")
-
- -
-
-
- -
-
-
-
-
-
-

You can rotate and interact with molecules once they have loaded (the "[*]" next to a cell indicates that it is processing. It will turn into a number once processing is complete)

-

You can change molecular representations and interact with the 3D molecule view. Later on we will go through some examples showing what you can do and how you can select parts of the molecule (or click here if you want to jump ahead and learn now).

-

In addition to viewing molecules, you can also draw graphs. This is achieved using a combination of the matplotlib, numpy and pandas modules. For example, the below code draws a sine and cosine curve.

- -
-
-
-
-
-
In [ ]:
-
-
-
# First, import pandas, numpy and matplotlib
-import pandas as pd
-from pandas import Series, DataFrame
-import numpy as np
-import matplotlib.pyplot as plt
-%config InlineBackend.figure_format = 'svg'   # helps make things look better in Jupyter :-)
-
-# now define a sine and cosine curve
-X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
-data = {'cos': np.cos(X), 'sin': np.sin(X)}
-trig = DataFrame(index=X, data=data)
-
-trig.plot()
-
- -
-
-
- -
-
-
-
-
-
-

Later on we will go through pandas, matplotlib and numpy and you will learn how to draw lots of different and interesting graphs (or click the links to jump ahead and learn now).

- -
-
-
-
-
-
-
-
-

Mixing documentation and code

In addition to interactivity, the real benefit of a Jupyter notebook is that it encourages you to include lots of documentation with your code. In effect, a notebook is a combination of code, documentation, and (once it has been run) analysis and data.

-

You can create documentation by changing the type of a cell. The type of a cell is indicated in the drop-down at the top, under the menu. If you click on the graph above, you will see that the type of a code cell is "Code".

-

Image showing code type

-

If you now click on this documentation, you should see the cell type change to "Markdown"

-

Image showing markdown type

- -
-
-
-
-
-
-
-
-

Creating your own cells

You can create your own cells in two different ways;

-
    -
  • either by pressing SHIFT+Return on the last cell (which creates a new cell at the bottom of the notebook),
  • -
  • or by clicking "Insert | Cell Above" or "Insert | Cell Below" from the menu to insert a new cell above or below the currently selected cell.
  • -
-

You can choose whether this should be a code or documentation (markdown) cell by selecting the appropriate value from the drop-down at the top.

-

Code cells can contain any valid Python. Documentation (markdown) cells can contain any text. The text can be formatted, using the markdown standard.

-

Have a go by creating some new documentation and code cells below. If you want some inspiration for documentation, take a look at the documentation cells above by double-clicking on them. You should see how I have inserted images, lists, headings etc.

-

You can then render the documentation using SHIFT+Return (just like running a code cell)

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Controlling the Python kernel

The Python process that is interpreting and executing your Python code is referred to as the "kernel" of the notebook (hence the "Kernel" menu above). When you execute a code cell it sends the code to the Python kernel, and then waits for the kernel to send a result back. The Python kernel can only execute one thing at a time. This means that, for slow functions, you could be executing cells too quickly and leave the Python kernel behind.

-

For example, here is a function that fakes a slow function as it goes to sleep for the specified number of seconds.

- -
-
-
-
-
-
In [ ]:
-
-
-
def sleep(n):
-    import os
-    os.system("sleep %d" % n)
-
- -
-
-
- -
-
-
-
-
-
-

Below we will have three code cells that each sleep for a long time. Execute all three quickly one after another...

- -
-
-
-
-
-
In [ ]:
-
-
-
sleep(30)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
sleep(30)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
sleep(30)
-
- -
-
-
- -
-
-
-
-
-
-

You should see that there is a [*] next to each code cell. This is because they are all waiting for the Python kernel to finish executing and return.

-

Jupyter is sleeping

-

If you wait you will see each sleep finishing and control then passing to the next.

-

Sometimes you don't want to be patient, and you want to stop the Python kernel from running a function. To stop (interupt) the kernel, either click the "Stop" icon at the top of the screen, or select "Kernel | Interupt" from the menu.

-

Jupyter stop button

-

Other useful buttons up there are "restart the kernel" (same as the menu Kernel | Restart & Clear Output) and the arrows that can move the selected cell up and down the notebook.

-

Have a play. Explore the buttons and menu items. Take a look at the "Help" in the menu. Remember that, if everything goes wrong, you can always click "Stop" or select "Kernel | Restart & Clear Output" from the menu.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Running external programs in a code cell

You can do more than just execute Python in the code cells. You can run any command you want! Just put an exclamation mark first, and then type the command normally as you would in a terminal. For example, the below cell is running ls

- -
-
-
-
-
-
In [ ]:
-
-
-
! ls
-
- -
-
-
- -
-
-
-
-
-
-

You can run any command that you want, exactly as you would typing in a terminal. For example, lets use cat to read the LICENSE file...

- -
-
-
-
-
-
In [ ]:
-
-
-
! cat LICENSE
-
- -
-
-
- -
-
-
-
-
-
-

You can work with directories as well. For example, here we will copy the dioxin.pdb file from data into the current directory and will then view it using viewMolecules...

- -
-
-
-
-
-
In [ ]:
-
-
-
! cp data/dioxin.pdb ./
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
from BioSimSpace import viewMolecules  # importing again in case we restarted the kernel
-viewMolecules("dioxin.pdb")
-
- -
-
-
- -
-
-
-
-
-
-

Saving the notebook

At the top of the screen is the name of the notebook, and its current save state.

-

Jupyter file info

-

The current save state tells you when the last checkpoint was saved, and whether or not the notebook contains any unsaved changes. Checkpoints provide points in the notebook that you can refer back to, using the menu item "File | Revert to Checkpoint". You are always able to revert to the last checkpoint, meaning that if you make a mistake, you can go back to a previous safe version.

-

If you click "File | Save and Checkpoint", or click the "Save" icon (floppy disk icon on the far left), then the current state of the notebook will be saved. This includes not just the code and documentation you have written, but also all of the outputs and graphs that you have created. This is really useful, as it lets you save your work, and then come back later.

-

For example, what don't you save now, and then close this window and then open the notebook again? You should see that everything you have done, and everything that was output is restored. Did it work?

-

The notebook was saved to a file called 01_jupyter_howto.ipynb. The .ipynb signifies that the file is a Jupyter notebook (formerly called an interactive python notebook). You can change the name by clicking on the name of the notebook above, and then choosing a different name in the window that opens.

-

Alternatively, you can save a copy of this notebook under a different name by clicking "File | Make a Copy..." in the menu. You can then copy this file or send it to someone else. As a notebook contains the code, data, outputs and analysis, sharing notebooks is the closest thing we have to interactive papers. This is a really powerful concept and, I believe, will fundementally change how we report and share the results of computational and data analysis in science.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

The Jupyter Hub

When you first logged into this website you launched this notebook by clicking on its name in the Jupyter Hub. This was the page that looked like a file browser and that (hopefully) is still open in one of the tabs in your browser. The Jupyter Hub is not just a file brower. It is the gateway to do a lot more than just interact with a single notebook.

-

The first thing to note is that this Jupyter Hub is running on the cloud and consumes resources. It is important that you click "Logout" (top right) when you have finished, as this will free up the hub to be used by someone else. There is a timer that will automatically log you out 40 minutes after your web browser is closed. However, please remember to log out if you can.

-

Jupyter hub

-

The main view of the hub is the file browser that shows all of your files. You can navigate through the files and directories like any normal file browser. For example, you can see above the 01_jupyter_howto.ipynb file that contains this notebook. You should see that the file has a green icon next to it, as well as a Running indicator on the right. This indicates that the notebook is currently running.

-

Running notebooks consume cloud resources, which are limited. You can only have 1.8 GB of data in memory and have limited CPU. You should thus shutdown any running notebooks that you aren't using any more. To do this, click the checkbox to the left of the notebook filename. This will bring up some buttons at the top of the screen. Click the "Shutdown" button to shut down the notebook.

-

Shutdown running kernels

-

Alternatively, you can go to the "Running" tab to see everything of your's that is currently running in the hub. Click the "Shutdown" button on the right for anything that you aren't using and want to shut down.

-

Running tab

-

Back in the "Files" tab, clicking the checkbox next to a file brings up buttons that let you download files. You can use the hub to download files by selecting them and clicking the "Download" button that will appear. Other buttons will appear depending on the type of file, e.g. "Duplicate", "View", "Edit" etc. The bin icon will delete the file.

-

Download a file

-

You can create new notebooks or files by clicking on the "New" button on the right. This will open a drop-down menu for you to select the type of thing to create.

-

New dropdown

-

As well as letting you create a new jupyter notebook (by clicking "New | Python3"), you can also create new text files ("New | Text File") or new folders ("New | Folder"). Most interestingly, is that you can also create a new bash terminal. If you click "New | Terminal" it will open a new tab in which you will have a fully functional bash terminal shell. This is running on the same cloud server as your notebook. It is 100% completely a fully functional terminal within your web browser. You could even use it to run the normal command-line python ;-)

-

Bash in the browser

- -
-
-
-
-
-
-
-
-

The cloud server on which this is running comes with lots of useful software that you can run from within the bash shell. For example you have;

-
    -
  • update_workshops : This command will update all of the software and workshop material to the latest version. Useful if you delete anything by accident or the tutor needs to make a last-minute fix
  • -
  • git : Git is installed. The workshop material is downloaded and updated from git repositories, e.g. https://github.com/ccpbiosim/python_and_data. You could also use git to push things back to the cloud, e.g. if you want to download lots of files.
  • -
  • anaconda python3 : A full anaconda python3 is installed, with pandas, matplotlib, numpy etc. etc.
  • -
  • wget : This tool lets you quickly download (or should that be sideload?) files onto the cloud server
  • -
  • top : Process monitor. You can use this to see which processes are consuming lots of resources. Any process that uses more than 1.8 GB of memory will automatically be killed by the cloud server. You can kill them yourself using the kill command.
  • -
  • tar and bzip2 : useful for packing/unpacking and compressing/uncompressing files
  • -
  • AmberTools : The full AmberTools suite is installed in directory $AMBERHOME
  • -
  • ProtoMS : The full ProtoMS package is installed in directory $PROTOMSHOME
  • -
  • wham and wham-2d : Tools used for weighted histogram analysis
  • -
  • Sire and BioSimSpace : The Sire and BioSimSpace python simulation framework and workflow node packages.
  • -
-

The cloud server was build with docker. If you want to download it yourself to run on your own computer at home after the workshop then please install docker and type

- -
docker run -it --rm -p 8888:8888 chryswoods/bss-workshop:latest
-

This will download and run the image from dockerhub and will make it available at the web address that will be printed to the screen (e.g. it will look something like http://localhost:8888/?token=641396480e6421eae8b18261d82a75f958fe166e1c8b20a8). Simply open that address in your browser :-). You can see and download the DockerFile used to generate this image from here.

-

Thanks to the Microsoft Azure Kubernetes Service on which this Jupyter image is running.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/02_lists.html b/html/02_lists.html deleted file mode 100644 index 718fac9..0000000 --- a/html/02_lists.html +++ /dev/null @@ -1,12258 +0,0 @@ - - - -02_lists - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Lists

Lists provide a simple way to hold a collection of values. You create a list using square brackets. For example, here we can create a list that contains the values "cat", "dog" and "horse"

-
a =[ "cat", "dog", "horse" ]
-
- -
-
-
-
-
-
In [ ]:
-
-
-
a = ["cat", "dog", "horse"]
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
print(a)
-
- -
-
-
- -
-
-
-
-
-
-

You can access the items in the list by placing the index of the item in square brackets. The first item is at index 0

- -
-
-
-
-
-
In [ ]:
-
-
-
a[0]
-
- -
-
-
- -
-
-
-
-
-
-

The second item is at index 1, and the third item at index 2.

- -
-
-
-
-
-
In [ ]:
-
-
-
a[2]
-
- -
-
-
- -
-
-
-
-
-
-

What do you think will happen if we access the item at index 3?

- -
-
-
-
-
-
In [ ]:
-
-
-
a[3]
-
- -
-
-
- -
-
-
-
-
-
-

You can also access the items in the list from the back. What do you think is at index -1?

- -
-
-
-
-
-
In [ ]:
-
-
-
a[-1]
-
- -
-
-
- -
-
-
-
-
-
-

What about index -2, -3 or -4?

- -
-
-
-
-
-
In [ ]:
-
-
-
a[-3]
-
- -
-
-
- -
-
-
-
-
-
-

You can add items onto a list by using the .append function. You can find this using tab completion and Python help...

- -
-
-
-
-
-
In [ ]:
-
-
-
help(a.append)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
a.append("fish")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
a
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
a[3]
-
- -
-
-
- -
-
-
-
-
-
-

You can put whatever you want into a list, including other lists!

- -
-
-
-
-
-
In [ ]:
-
-
-
b = [ 42, 15, a, [7,8,9] ]
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
b[3]
-
- -
-
-
- -
-
-
-
-
-
-

Putting lists inside lists allows for multidimensional lookup, e.g. can you work out why b[3][2] equals 9?

- -
-
-
-
-
-
In [ ]:
-
-
-
b[3][2]
-
- -
-
-
- -
-
-
-
-
-
-

You can loop over the items in a list using a for loop, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
for x in a:
-    print(x)
-
- -
-
-
- -
-
-
-
-
-
-

You can get the number of items in the list using the len function.

- -
-
-
-
-
-
In [ ]:
-
-
-
len(a)
-
- -
-
-
- -
-
-
-
-
-
-

You can use this as an alternative way of looping over the elements of a list

- -
-
-
-
-
-
In [ ]:
-
-
-
for i in range(0,len(a)):
-    print(a[i])
-
- -
-
-
- -
-
-
-
-
-
-

A string behaves like a list of letters. For example, if we have the string s = "Hello World", then s[0] is "H" and s[-1] is d.

- -
-
-
-
-
-
In [ ]:
-
-
-
s = "Hello World"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
s[-1]
-
- -
-
-
- -
-
-
-
-
-
-

You can loop over every letter in a string in the same way that you can loop over every item in a list.

- -
-
-
-
-
-
In [ ]:
-
-
-
for letter in s:
-    print(letter)
-
- -
-
-
- -
-
-
-
-
-
-

Exercises

Exercise 1

Create two Python lists called a and b. Put into these lists the values [2, 4, 6, 8], and [10, 20, 30, 40]. -Check that a[2] equals 6 and b[-1] equals 40. (note that you will need to use the menu "Insert | Insert Cell Below" to insert more cells below to create space for your code)

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

Now create a loop that loops over each item in a and b and that calculates and prints out the product a[i] * b[i].

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 3

Modify your code to create a list called c. Use the .append function to set c[i] = a[i] * b[i]. Check your code by making sure that c[-1] equals 320.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/03_dictionaries.html b/html/03_dictionaries.html deleted file mode 100644 index 574d0be..0000000 --- a/html/03_dictionaries.html +++ /dev/null @@ -1,12160 +0,0 @@ - - - -03_dictionaries - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Dictionaries

Dictionaries are another type of Python container. Instead of storing values by index, they store them associated with a key.

-

You create dictionaries using curly brackets, assiging values to their keys using a colon, e.g.

-
a = { "cat" : "mieow", "dog" : "woof", "horse" : "neigh" }
-
- -
-
-
-
-
-
In [ ]:
-
-
-
a = { "cat" : "mieow", "dog" : "woof", "horse" : "neigh"}
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
a
-
- -
-
-
- -
-
-
-
-
-
-

You can look up values in the dictionary by placing the key in square brackets. For example, we can look up the value associated with the key "cat" using a["cat"].

- -
-
-
-
-
-
In [ ]:
-
-
-
a["cat"]
-
- -
-
-
- -
-
-
-
-
-
-

What happens if the key does not exist?

- -
-
-
-
-
-
In [ ]:
-
-
-
a['fish']
-
- -
-
-
- -
-
-
-
-
-
-

You insert items into the dictionary by assigning values to keys, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
a["fish"] = "bubble"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
a
-
- -
-
-
- -
-
-
-
-
-
-

You can list all of the keys or values of a dictionary using the keys or values functions (which you can find using tab completion and Python help)

- -
-
-
-
-
-
In [ ]:
-
-
-
help(a.values)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
a.keys()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
a.values()
-
- -
-
-
- -
-
-
-
-
-
-

You can loop over the dictionary by looping over the keys and looking up the values in a for loop, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
for key in a.keys():
-    print("A %s goes %s" % (key, a[key]))
-
- -
-
-
- -
-
-
-
-
-
-

You can put anything as a value into a dictionary, including other dictionaries and even lists. The keys should be either numbers or strings.

- -
-
-
-
-
-
In [ ]:
-
-
-
b = { "a" : ["aardvark", "anteater", "antelope"], "b" : ["badger", "beetle"], 26.5: a}
-
- -
-
-
- -
-
-
-
-
-
-

What do you think is at b["a"][-1]? What about b[26.5]["fish"]?

- -
-
-
-
-
-
In [ ]:
-
-
-
b[26.5]["fish"]
-
- -
-
-
- -
-
-
-
-
-
-

Exercise

Below you have a dictionary that contains the full mapping of every letter to its Morse-code equivalent.

- -
-
-
-
-
-
In [ ]:
-
-
-
letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 1

Use the morse code dictionary to look up the morse code for the letters "s" and "o". What is the morse code for "SOS" (the international emergency distress signal)?

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

Here is a string that contains a message that must be converted to morse code. Write a loop that converts each letter into the morse code equivalent, and stores it into a list. Print the list out to see the full morse code message that must be sent. Note that you will need to use the .lower() function to get the lower case of capital letters.

- -
-
-
-
-
-
In [ ]:
-
-
-
message = "SOS We have hit an iceberg and need help quickly"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 3

The inverted form of a dictionary is one where the keys are now looked up by value. For example, the below code inverts letter_to_morse such that the morse code is the key, and the letter is the value.

- -
-
-
-
-
-
In [ ]:
-
-
-
morse_to_letter = {}
-for letter in letter_to_morse.keys():
-    morse_to_letter[ letter_to_morse[letter] ] = letter
-
- -
-
-
- -
-
-
-
-
-
-

Check that this code works by verifying that morse_to_letter["..."] equals "s".

-

Next, loop through the morse code message you created in exercise 2 and see if you can convert it back to english. Note that you can join a list of letters together into a string using the code "".join(letters).

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/04_functions.html b/html/04_functions.html deleted file mode 100644 index 50404ca..0000000 --- a/html/04_functions.html +++ /dev/null @@ -1,12228 +0,0 @@ - - - -04_functions - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Functions

Functions provide a way to package often-used code into reusable and easy to use components. For example, here is some code that multiplies together two lists

- -
-
-
-
-
-
In [ ]:
-
-
-
list1 = [2, 4, 6, 8]
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
list2 = [10, 20, 30, 40]
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
list3 = []
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
for x, y in zip(list1,list2):
-    list3.append(x * y)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
list3
-
- -
-
-
- -
-
-
-
-
-
-

We don't want to keep typing out the above code every time we want to multiply the numbers in two lists. Instead, we can collect that code together into a function

- -
-
-
-
-
-
In [ ]:
-
-
-
def multiply(a, b):
-    c = []
-    for x,y in zip(a,b):
-        c.append(x*y)
-    return c
-
- -
-
-
- -
-
-
-
-
-
-

We can now call this function directly on our lists, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
list3 = multiply(list1, list2)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
list3
-
- -
-
-
- -
-
-
-
-
-
-

The function is called using its name, and passing in the values as two arguments, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
list4 = multiply( [1,2,3], [4,5,6] )
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
list4
-
- -
-
-
- -
-
-
-
-
-
-

The arguments are placed inside the round brackets. These are copied, in order, to the function. For example, [1,2,3] is copied into a, and [4,5,6] is copied as b. The code in the function is then executed. We can watch this code being run by adding in print statements, e.g.

-
def multiply(a, b):
-    print("a = %s" % a)
-    print("b = %s" % b)
-    c = []
-    for x,y in zip(a,b):
-        print("%s times %s equals %s" % (x,y,x*y))
-        c.append(x*y)
-    print("c = %s" % c)
-    return c
-
- -
-
-
-
-
-
-
-
-

You must pass the right number of arguments into a function. For example, this is what happens if you get the number of arguments wrong...

- -
-
-
-
-
-
In [ ]:
-
-
-
list5 = multiply(list1)
-
- -
-
-
- -
-
-
-
-
-
-

You can write functions that take as many (or as few) arguments as you want. For example, here is a function that takes no arguments, and then a function that takes lots

- -
-
-
-
-
-
In [ ]:
-
-
-
def func0():
-    return "no arguments to this function"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
def func1(a, b, c, d, e=5):
-    return a+b+c+d+e
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
func0()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
func1(1, 2, 3, 4, 5)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
func1(1, 2, 3, 4)
-
- -
-
-
- -
-
-
-
-
-
-

Note that with the last function we have set a default value of the argument e. This is given the value of 5 if it is not specified. This allows us to pass 4 arguments instead of 5. Changing the default value by editing the definition of the function above will thus change the output of func1 when it is called with only four arguments.

- -
-
-
-
-
-
-
-
-

Exercise

Here is the morse code dictionary from the last session, together with the code that converts a message from english into morse code.

- -
-
-
-
-
-
In [ ]:
-
-
-
letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
message = "SOS We have hit an iceberg and need help quickly"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
morse = []
-for letter in message:
-    morse.append( letter_to_morse[letter.lower()] )
-print(morse)
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 1

Create a function called encode that takes a message and returns the morse code equivalent. Test this function by encodig the message SOS We have hit an iceberg and need help quickly and check that you get the same result as in the last session. Now try using your function to encode other messages.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

Using the answer from Exercise 2 in the dictionaries lesson, write a function called decode that converts a morse code message back to english. Check that you can decode the above morse code message back to english.

- -
-
-
-
-
-
In [ ]:
-
-
-
  
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 3

Below is a list of messages. Loop over the messages and check that encode( decode(message) ) equals the original message. Do any of the messages fail to encode and decode correctly? If so, why? How can your check be modified to account for the limitations of your encode and decode functions?

- -
-
-
-
-
-
In [ ]:
-
-
-
messages = [ "hello world", "this is a long message", "Oh no this may break", "This message is difficult to encode." ]
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/05_objects.html b/html/05_objects.html deleted file mode 100644 index 6915e2c..0000000 --- a/html/05_objects.html +++ /dev/null @@ -1,12147 +0,0 @@ - - - -05_objects - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Objects

In the last session you learned how to package up useful code into functions. This is a really useful idea, as it lets you re-use useful code in your own scripts, and to then share useful code with other people.

-

However, it is normal for functions to rely on data. For example, consider the Morse code encode and decode functions in the last lesson. These only work because of the data contained in the letter_to_morse dictionary. The functions would break if anyone changes the data in this dictionary.

- -
-
-
-
-
-
In [ ]:
-
-
-
letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
def encode(message):
-    morse = []
-    for letter in message:
-        morse.append( letter_to_morse[letter.lower()] )
-    return morse
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
encode("Hello")
-
- -
-
-
- -
-
-
-
-
-
-

The above encode("Hello") has worked. However, if we change the data in letter_to_morse, e.g. swapping l from .-.. to -.--, then we get ['....', '.', '-.--', '-.--', '---'], which is wrong. We can make even larger changes, which would completely break the function...

- -
-
-
-
-
-
-
-
-

While such changes are easy to spot in this example, they become more difficult to find in larger programs. In addition, as you share code, you will find that people using your code will do weird things to the data on which it depends, which can introduce weird bugs and problems.

-

The solution is to package a function together with the data on which it depends into a single object. This idea is the foundation of object orientated programming. To explore this, let us start with a simple example that packages the encode function together with the letter_to_morse dictionary on which it depends.

- -
-
-
-
-
-
In [ ]:
-
-
-
class Morse:
-    def __init__(self):
-        self._letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-        
-    def encode(self, message):
-        morse = []
-        for letter in message:
-            morse.append( self._letter_to_morse[letter.lower()] )
-        return morse
-
- -
-
-
- -
-
-
-
-
-
-

Above, we have packaged the data (letter_to_morse) together with the encode function into what we call a class. A Class describes how data and functions are combined together. An instance of a class is called an object, which we can create by calling Morse().

- -
-
-
-
-
-
In [ ]:
-
-
-
m = Morse()
-
- -
-
-
- -
-
-
-
-
-
-

m is an object of the class Morse. It has its own copy of letter_to_morse within it, and its own copy of the encode function. We can call m's copy of the encode function by typing m.encode(...), e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
m.encode("Hello World")
-
- -
-
-
- -
-
-
-
-
-
-

To create a new class, you use the class keyword, followed by the name of your class. In this case, class Morse defined a new class called Morse. You then add a colon, and write all of the functions that should be part of the class indented below. At a minimum, you must define one function, called the constructor. This function has the signature def __init__(self, arguments...). The first argument, self, is a special variable that allows an object of the class to access the data that belongs to itself. It is the job of the constructor to set up that data. For example, let's now create a new class that provides a simple guessing game.

- -
-
-
-
-
-
In [ ]:
-
-
-
class GuessGame:
-    def __init__(self, secret):
-        self._secret = secret
-        
-    def guess(self, value):
-        if (value == self._secret):
-            print("Well done - you have guessed my secret")
-        else:
-            print("Try again...")
-
- -
-
-
- -
-
-
-
-
-
-

In this class, the constructor __init__(self, secret) takes an extra argument after self. This argument is saved as the _secret variable that is part of the self of the object. Note that we always name variables that are part of a class with a leading underscore. We can construct different object instances of GuessGame that have different secrets, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
g1 = GuessGame("cat")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
g2 = GuessGame("dog")
-
- -
-
-
- -
-
-
-
-
-
-

Here, the self._secret for g1 equals "cat". The self._secret for g2 equals "dog".

-

When we call the function g1.guess(value), it compares value against self._secret for g1.

- -
-
-
-
-
-
In [ ]:
-
-
-
g1.guess("dog")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
g1.guess("cat")
-
- -
-
-
- -
-
-
-
-
-
-

When we call the function g2.guess(value) it compares value against self._secret for g2.

- -
-
-
-
-
-
In [ ]:
-
-
-
g2.guess("cat")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
g2.guess("dog")
-
- -
-
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Edit the below GuessGame example so that it records how many unsuccessful guesses have been performed. Add a function called nGuesses() that returns the number of unsuccessful guesses. Once you have made the changes, check your class by creating an object of your class and using it to make some successful and unsuccessful guesses.

- -
-
-
-
-
-
In [ ]:
-
-
-
class GuessGame:
-    def __init__(self, secret):
-        self._secret = secret
-        
-    def guess(self, value):
-        if (value == self._secret):
-            print("Well done - you have guessed my secret")
-        else:
-            print("Try again...")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

Edit the constructor of your GuessGame class so that the user can optionally specify a maximum number of allowable guesses. If the maximum number of guesses is not supplied, then set the default value to 5.

-

Create a maxGuesses() function that returns the maximum number of allowable guesses.

-

Finally, edit the guess() function so that it will not let you make more than the maximum number of guesses (e.g. if the number of guesses exceeds the maximum number, then print out "Sorry, you have run out of guesses.").

-

Check that you code works by creating an object of GuessGame that only allows three guesses, and see what happens if you guess incorrectly more than three times.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/06_classes.html b/html/06_classes.html deleted file mode 100644 index a971ad0..0000000 --- a/html/06_classes.html +++ /dev/null @@ -1,12493 +0,0 @@ - - - -06_classes - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Classes

Classes allow you to define how to package data with functions to create objects. An object is an instance of a class, which contains its own data, and its own copy of functions that can operate on that data.

-

You use classes to define objects that represent the concepts and things that your program will work with. For example, if your program managed exam results of students, then you may create one class that represents an Exam, and another that represents a Student.

- -
-
-
-
-
-
In [ ]:
-
-
-
class Exam:
-    def __init__(self, max_score=100):
-        self._max_score = max_score
-        self._actual_score = 0
-        
-    def percent(self):
-        return 100.0 * self._actual_score / self._max_score
-    
-    def setResult(self, score):
-        if (score < 0):
-            self._actual_score = 0
-        elif (score > self._max_score):
-            self._actual_score = self._max_score
-        else:
-            self._actual_score = score
-    
-    def grade(self):
-        if (self._actual_score == 0):
-            return "U"
-        elif (self.percent() > 90.0):
-            return "A"
-        elif (self.percent() > 80.0):
-            return "B"
-        elif (self.percent() > 70.0):
-            return "C"
-        else:
-            return "F"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
class Student:
-    def __init__(self):
-        self._exams = {}
-    
-    def addExam(self, name, exam):
-        self._exams[name] = exam
-        
-    def addResult(self, name, score):
-        self._exams[name].setResult(score)
-    
-    def result(self, exam):
-        return self._exams[exam].percent()
-    
-    def grade(self, exam):
-        return self._exams[exam].grade()
-    
-    def grades(self):
-        g = {}
-        for exam in self._exams.keys():
-            g[exam] = self.grade(exam)
-        return g
-
- -
-
-
- -
-
-
-
-
-
-

We can now create a student, and give them a set of exams that they need to complete.

- -
-
-
-
-
-
In [ ]:
-
-
-
s = Student()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
s.addExam( "maths", Exam(20) )
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
s.addExam( "chemistry", Exam(75) )
-
- -
-
-
- -
-
-
-
-
-
-

At this point, the student has not completed any exams, so the grades are all 'U'

- -
-
-
-
-
-
In [ ]:
-
-
-
s.grades()
-
- -
-
-
- -
-
-
-
-
-
-

However, we can now add the results...

- -
-
-
-
-
-
In [ ]:
-
-
-
s.addResult("maths", 15)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
s.addResult("chemistry", 62)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
s.grades()
-
- -
-
-
- -
-
-
-
-
-
-

Programming with classes makes the code easier to read, as the code more closely represents the concepts that make up the program. For example, here we have a class that represents a full school of students.

- -
-
-
-
-
-
In [ ]:
-
-
-
class School:
-    def __init__(self):
-        self._students = {}
-        self._exams = []
-
-    def addStudent(self, name):
-        self._students[name] = Student()
-
-    def addExam(self, exam, max_score):
-        self._exams.append(exam)
-        
-        for key in self._students.keys():
-            self._students[key].addExam(exam, Exam(max_score))
-    
-    def addResult(self, name, exam, score):
-        self._students[name].addResult(exam, score)
-        
-    def grades(self):
-        g = {}
-        for name in self._students.keys():
-            g[name] = self._students[name].grades()
-        return g
-
- -
-
-
- -
-
-
-
-
-
-

We can now create a whole school of students and manage the exams and results for all of them with some reasonably readable code :-)

- -
-
-
-
-
-
In [ ]:
-
-
-
school = School()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school.addStudent("Charlie")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school.addStudent("Matt")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school.addStudent("James")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school.addExam( "maths", 20 )
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school.addExam( "physics", 50 )
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school.addExam( "english literature", 30 )
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school.grades()
-
- -
-
-
- -
-
-
-
-
-
-

We can now add in the results of the exams, which have been returned to us by the exam markers...

- -
-
-
-
-
-
In [ ]:
-
-
-
englit_results = { "Charlie" : 10, "Matt" : 25, "James" : 3 }
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
phys_results = { "Matt" : 48, "James" : 3 }
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
maths_results = { "James" : 20, "Matt" : 18, "Charlie" : 4 }
-
- -
-
-
- -
-
-
-
-
-
-

Indeed, we will do this by using a function...

- -
-
-
-
-
-
In [ ]:
-
-
-
def add_results(school, exam, results):
-    for student in results.keys():
-        school.addResult(student, exam, results[student])
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
add_results(school, "english literature", englit_results)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
add_results(school, "physics", phys_results)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
add_results(school, "maths", maths_results)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school.grades()
-
- -
-
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Here is a copy of the Morse class from the last section. Modify this class to add in a decode function that converts Morse code back to english. Check that this class works by seeing if m.decode( m.encode(message) ) == message.lower().

- -
-
-
-
-
-
In [ ]:
-
-
-
class Morse:
-    def __init__(self):
-        self._letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-        
-    def encode(self, message):
-        morse = []
-        for letter in message:
-            morse.append( self._letter_to_morse[letter.lower()] )
-        return morse
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

Below is a copy of the School class, together with a copy of the code needed to populate an object of that class with students and exam results. Edit the School class to add in the following functions:

-
    -
  • .resits() : this should return the list of exams that each student should resit if they get a "F" or "U" grade.
  • -
  • .prizeStudent() : this should return the name of the student who scored the highest average percent across all of the exams.
  • -
  • .reviseCourse(threshold) : this should return the name of the exam that gets the lowest average score across all students, if the average score is below threshold.
  • -
-

Use these functions to find out which students need to resit which exams, which student should be awarded the annual school prize, and which courses should be revised as the average mark is less than 50%.

- -
-
-
-
-
-
In [ ]:
-
-
-
class School:
-    def __init__(self):
-        self._students = {}
-        self._exams = []
-
-    def addStudent(self, name):
-        self._students[name] = Student()
-
-    def addExam(self, exam, max_score):
-        self._exams.append(exam)
-        
-        for key in self._students.keys():
-            self._students[key].addExam(exam, Exam(max_score))
-    
-    def addResult(self, name, exam, score):
-        self._students[name].addResult(exam, score)
-        
-    def grades(self):
-        g = {}
-        for name in self._students.keys():
-            g[name] = self._students[name].grades()
-        return g
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
students = ["Charlie", "James", "Matt"]
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
exams = { "maths" : 20, "physics" : 50, "english literature" : 30 }
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
results = { "maths" : { "James" : 20, "Matt" : 18, "Charlie" : 4 }, 
-            "physics" : { "Matt" : 48, "James" : 3 },
-            "english literature" : { "Charlie" : 10, "Matt" : 25, "James" : 3 } }
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school = School()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
for student in students:
-    school.addStudent(student)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
for exam in exams.keys():
-    school.addExam(exam, exams[exam])
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
for exam in results:
-    add_results(school, exam, results[exam])
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
school.grades()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/07_documentation.html b/html/07_documentation.html deleted file mode 100644 index faaccca..0000000 --- a/html/07_documentation.html +++ /dev/null @@ -1,12186 +0,0 @@ - - - -07_documentation - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Documentation

Python has great in-built documentation that is available via the help function. For example

- -
-
-
-
-
-
In [ ]:
-
-
-
l = ["cat", "dog", "fish"]
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
help(l)
-
- -
-
-
- -
-
-
-
-
-
-

You can add similar documentation to the functions that you write. You do this by adding in a documentation string as the first string after defining the function e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
def multiply(a, b):
-    """This function returns the element-wise multiplication of the passed lists 'a' and 'b'"""
-    c = []
-    for x,y in zip(a,b):
-        c.append(x*y)
-    return c
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
multiply( [1,2,3], [4,5,6] )
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
help(multiply)
-
- -
-
-
- -
-
-
-
-
-
-

The documentation string should be placed between two sets of triple quotes ("""). This is a convention that makes it easier to expand the documentation later, and that ensures that nothing you write in the documentation will be expanded or interpreted as Python.

-

Documentation should provide an easy to understand, and brief description of what the function does. It should not give information that is obvious by reading the function signature. For example, this is a bad piece of documentation.

- -
-
-
-
-
-
In [ ]:
-
-
-
def multiply(a, b):
-    """function multiply(a,b) -> list"""
-    c = []
-    for x,y in zip(a,b):
-        c.append(x*y)
-    return c
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
help(multiply)
-
- -
-
-
- -
-
-
-
-
-
-

It is much better to say what the function does, and then what it returns (as this can't be seen from the signature). Good documentation would be

- -
-
-
-
-
-
In [ ]:
-
-
-
def multiply(a, b):
-    """Calculates the element-wise multiplication of a and b, returning a list of the results"""
-    c = []
-    for x,y in zip(a,b):
-        c.append(x*y)
-    return c
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
help(multiply)
-
- -
-
-
- -
-
-
-
-
-
-

Your documentation can span over multiple lines. If you are describing the arguments, then you should use one line per argument, for example

- -
-
-
-
-
-
In [ ]:
-
-
-
def make_complex(real, imag=0):
-    """Create and return a complex number
-    
-       Keyword arguments:
-       
-       real -- the real part of the number
-       imag -- the imaginary part of the number
-    """
-    return (real,imag)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
help(make_complex)
-
- -
-
-
- -
-
-
-
-
-
-

By convention, you will notice above that the last """ is placed on its own line if the documentation spans multiple lines. It is on the same line if the documentation is short.

-

In general, keep your documentation short, to the point, and avoid repeating obvious information. However, be precise, as this may be the only part of your code that somebody else reads before they use your function in their program.

-

A good suggestion is to look at documentation you like and try to copy that style. Also, look for code that you think is poorly documented, and try to avoid their mistakes.

- -
-
-
-
-
-
-
-
-

Exercise

Below is a series of undocumented functions. Take a look through the functions and try to work out what they do. Once you understand the functions, write some documentation for each function. Get your neighbour to read your documentation. Do they understand what the function does based on what you have written? Do the function names -combined with your documentation accurately convey the result of calling the function?

-

Note that you may have to use help(...) yourself if you don't recognise some of the code in the functions. Also try to play with the function to see how it behaves.

- -
-
-
-
-
-
In [ ]:
-
-
-
def add(a, b):
-    c = []
-    for x,y in zip(a,b):
-        c.append(x+y)
-    return c
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
def subtract(a, b):
-    c = []
-    for x,y in zip(a,b):
-        c.append(x / y)
-    return c
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
def capitalise(message):
-    words = message.split(" ")
-    for i in range(0,len(words)):
-        words[i] = "%s%s" % (words[i][0].upper(), words[i][1:])
-    return " ".join(words)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
def surprise(x):
-    import random
-    if x < random.random():
-        print("Surprise!")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

For this last function, try calling it via list_interface("ipynb").

- -
-
-
-
-
-
In [ ]:
-
-
-
def list_interface(x):
-    import glob
-    f = glob.glob("*.%s" % x)
-    l = []
-    for x in f:
-        if x.startswith("0"):
-            l.append(x)
-    return l
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/08_class_documentation.html b/html/08_class_documentation.html deleted file mode 100644 index a8ae35b..0000000 --- a/html/08_class_documentation.html +++ /dev/null @@ -1,12228 +0,0 @@ - - - -08_class_documentation - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Documenting Classes

It is almost as easy to document a class as it is to document a function. Simply add docstrings to all of the classes functions, and also below the class name itself. For example, here is a simple documented class

- -
-
-
-
-
-
In [ ]:
-
-
-
class Demo:
-    """This class demonstrates how to document a class.
-    
-       This class is just a demonstration, and does nothing.
-       
-       However the principles of documentation are still valid!
-    """
-    
-    def __init__(self, name):
-        """You should document the constructor, saying what it expects to 
-           create a valid class. In this case
-           
-           name -- the name of an object of this class
-        """
-        self._name = name
-    
-    def getName(self):
-        """You should then document all of the member functions, just as
-           you do for normal functions. In this case, returns
-           the name of the object
-        """
-        return self._name
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
d = Demo("cat")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
help(d)
-
- -
-
-
- -
-
-
-
-
-
-

Often, when you write a class, you want to hide member data or member functions so that they are only visible within an object of the class. For example, above, the self._name member data should be hidden, as it should only be used by the object.

-

You control the visibility of member functions or member data using an underscore. If the member function or member data name starts with an underscore, then it is hidden. Otherwise, the member data or function is visible.

-

For example, we can hide the getName function by renaming it to _getName

- -
-
-
-
-
-
In [ ]:
-
-
-
class Demo:
-    """This class demonstrates how to document a class.
-    
-       This class is just a demonstration, and does nothing.
-       
-       However the principles of documentation are still valid!
-    """
-    
-    def __init__(self, name):
-        """You should document the constructor, saying what it expects to 
-           create a valid class. In this case
-           
-           name -- the name of an object of this class
-        """
-        self._name = name
-    
-    def _getName(self):
-        """You should then document all of the member functions, just as
-           you do for normal functions. In this case, returns
-           the name of the object
-        """
-        return self._name
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
d = Demo("cat")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
help(d)
-
- -
-
-
- -
-
-
-
-
-
-

Member functions or data that are hidden are called "private". Member functions or data that are visible are called "public". You should document all public member functions of a class, as these are visible and designed to be used by other people. It is helpful, although not required, to document all of the private member functions of a class, as these will only really be called by you. However, in years to come, you will thank yourself if you still documented them... ;-)

-

While it is possible to make member data public, it is not advised. It is much better to get and set values of member data using public member functions. This makes it easier for you to add checks to ensure that the data is consistent and being used in the right way. For example, compare these two classes that represent a person, and hold their height.

- -
-
-
-
-
-
In [ ]:
-
-
-
class Person1:
-    """Class that holds a person's height"""
-    def __init__(self):
-        """Construct a person who has zero height"""
-        self.height = 0
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
class Person2:
-    """Class that holds a person's height"""
-    def __init__(self):
-        """Construct a person who has zero height"""
-        self._height = 0
-    
-    def setHeight(self, height):
-        """Set the person's height to 'height', returning whether or 
-           not the height was set successfully
-        """
-        if height < 0 or height > 300:
-            print("This is an invalid height! %s" % height)
-            return False
-        else:
-            self._height = height
-            return True
-        
-    def getHeight(self):
-        """Return the person's height"""
-        return self._height
-
- -
-
-
- -
-
-
-
-
-
-

The first example is quicker to write, but it does little to protect itself against a user who attempts to use the class badly.

- -
-
-
-
-
-
In [ ]:
-
-
-
p = Person1()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p.height = -50
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p.height
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p.height = "cat"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p.height
-
- -
-
-
- -
-
-
-
-
-
-

The second example takes more lines of code, but these lines are valuable as they check that the user is using the class correctly. These checks, when combined with good documentation, ensure that your classes can be safely used by others, and that incorrect use will not create difficult-to-find bugs.

- -
-
-
-
-
-
In [ ]:
-
-
-
p = Person2()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p.setHeight(-50)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p.getHeight()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p.setHeight("cat")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p.getHeight()
-
- -
-
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Below is the completed GuessGame class from the previous lesson. Add documentation to this class.

- -
-
-
-
-
-
In [ ]:
-
-
-
class GuessGame:
-    def __init__(self, secret, max_guesses=5):
-        self._secret = secret
-        self._nguesses = 0
-        self._max_guesses = max_guesses
-    
-    def guess(self, value):
-        if (self.nGuesses() >= self.maxGuesses()):
-            print("Sorry, you have run out of guesses")
-        elif (value == self._secret):
-            print("Well done - you have guessed my secret")
-        else:
-            self._nguesses += 1
-            print("Try again...")
-    
-    def nGuesses(self):
-        return self._nguesses
-    
-    def maxGuesses(self):
-        return self._max_guesses
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

Below is a poorly-written class that uses public member data to store the name and age of a Person. Edit this class so that the member data is made private. Add get and set functions that allow you to safely get and set the name and age.

- -
-
-
-
-
-
In [ ]:
-
-
-
class Person:
-    """Class the represents a Person, holding their name and age"""
-    def __init__(self, name="unknown", age=0):
-        """Construct a person with unknown name and an age of 0"""
-        self.name = name
-        self.age = age
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 3

Add a private member function called _splitName to your Person class that breaks the name into a surname and first name. Add new functions called getFirstName and getSurname that use this function to return the first name and surname of the person.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/09_exceptions.html b/html/09_exceptions.html deleted file mode 100644 index 56bd510..0000000 --- a/html/09_exceptions.html +++ /dev/null @@ -1,12116 +0,0 @@ - - - -09_exceptions - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Exceptions

Mistakes and errors happen in computer programs as much as in real life. Like life, how you handle an error in your program shows your level of professionalism, and gives others evidence that they can trust that you have written a program that will work well.

-

In the last section we indicated errors in the Person.setHeight function by printing a message to the screen and returning False to indicate that the call to setHeight had failed.

- -
-
-
-
-
-
In [ ]:
-
-
-
class Person:
-    """Class that holds a person's height"""
-    def __init__(self):
-        """Construct a person who has zero height"""
-        self._height = 0
-    
-    def setHeight(self, height):
-        """Set the person's height to 'height', returning whether or 
-           not the height was set successfully
-        """
-        if height < 0 or height > 300:
-            print("This is an invalid height! %s" % height)
-            return False
-        else:
-            self._height = height
-            return True
-        
-    def getHeight(self):
-        """Return the person's height"""
-        return self._height
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p = Person()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
p.setHeight(-20)
-
- -
-
-
- -
-
-
-
-
-
-

This is not a good way of indicating an error. The issues with this are;

-
    -
  • How does the person calling getHeight know to check whether the call returns True or False
  • -
  • What if we wanted to return something else? Should we return the error state and the value we want together?
  • -
  • If the error state is not checked, and nobody reads the error message printed to the screen, then the program is broken, as the person has been created with a height of 0.
  • -
-

The solution is to send something to the programmer that they cannot ignore, which indicates that there is an error. That something is called an "exception".

-

Take a look at this simple code that sets the height...

- -
-
-
-
-
-
In [ ]:
-
-
-
def setHeight(height):
-    if height < 0 or height > 300:
-        raise ValueError("Invalid height: %s. This should be between 0 and 300" % height)
-        
-    print("Height is set to %s" % height)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
setHeight(-5)
-
- -
-
-
- -
-
-
-
-
-
-

When we try to use an invalid value for the height, we raise (or throw) a ValueError exception. This stops the function from continuing, and gives us a very helpful print out of what went wrong, and where.

-

ValueError is just a class. The name of the class provides us with useful information (there was an error with a value in the program). You choose what error you want to raise. Python provides a set of usefully named error classes that you can use:

-
    -
  • IOError : Error raised when you have a problem with IO, e.g. opening or closing files
  • -
  • ZeroDivisionError : Error raised when you divide by zero
  • -
  • TypeError : Error raised when you are using the wrong type, e.g. maybe setting the height to a string
  • -
  • IndexError : Error raised when you are using an invalid index to access a list or other similar container
  • -
  • KeyError : Error raised when you are using an invalid key to access a dictionary or other similar container
  • -
-

A full list of standard Python exceptions is available here.

-

You are free to raise any exception class you want. It is your job as a programmer to choose the one that is most sensible, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
def setHeight(height):
-    if height < 0 or height > 300:
-        raise ZeroDivisionError("Invalid height: %s. This should be between 0 and 300" % height)
-        
-    print("Height is set to %s" % height)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
setHeight(400)
-
- -
-
-
- -
-
-
-
-
-
-

Using a ZeroDivisionError is a bad choice, as the error has nothing to do with division by zero. A ValueError is the right choice as the error relates to an invalid value passed to the function.

-

You are free to create your own exception classes.

- -
-
-
-
-
-
In [ ]:
-
-
-
class InvalidHeightError(Exception):
-    pass
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
def setHeight(height):
-    if height < 0 or height > 300:
-        raise InvalidHeightError("Invalid height: %s. This should be between 0 and 300" % height)
-        
-    print("Height is set to %s" % height)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
setHeight(-10)
-
- -
-
-
- -
-
-
-
-
-
-

Your own exception classes must be declared as derived from type Exception, hence why you have to write class InvalidHeightError(Exception):. As the class doesn't need to do anything else, you can use pass to say that nothing else needs to be added. Note that you can call your error class anything you want. By convention, it is good to end the class name with Error so that other programmers know what it is for.

- -
-
-
-
-
-
-
-
-

Exercise

Here is an extended copy of the Person code from above.

- -
-
-
-
-
-
In [ ]:
-
-
-
class Person:
-    """Class that holds a person's height"""
-    def __init__(self, height=0, weight=0):
-        """Construct a person with the specified name, height and weight"""
-        self.setHeight(height)
-        self.setWeight(weight)
-    
-    def setHeight(self, height):
-        """Set the person's height in meters"""
-        self._height = height
-    
-    def setWeight(self, weight):
-        """Set the person's weight in kilograms"""
-        self._weight = weight
-        
-    def getHeight(self):
-        """Return the person's height in meters"""
-        return self._height
-    
-    def getWeight(self):
-        """Return the person's weight in kilograms"""
-        return self._weight
-    
-    def bmi(self):
-        """Return the person's body mass index (bmi)"""
-        return self.getWeight() / self.getHeight()**2
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 1

Edit the above copy of Person to ensure that the .setWeight function only accepts valid weights. A valid weight is any number that is between 0 and 500 kilograms. You should raise a ValueError if the weight is outside this range. For the moment, do not worry about the user supplying a non-numeric weight.

-

Also edit the above copy of Person to ensure that the .setHeight function only accepts valid heights. A valid height is any number that is between 0 and 2.5 meters. You should raise a ValueError if the height is outside this range. For the moment, do not worry about the user supplying a non-numeric height.

-

Check that a ValueError exception is correctly raised if invalid heights or weights are supplied. Also check that the ValueError exception is not raised if a valid height and weight are supplied.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

If you run the following code;

-
p = Person()
-p.bmi()
-
-

it will raise a DivideByZero exception. This is because the calculation involves dividing by the height squared, which is zero in a default-constructed Person. While an exception has been raised, it is not very intuitive for another programmer to debug. A solution is to create your own named exception that provides more information.

-

Create a new exception called NullPersonError, and edit the .bmi() function so that this exception is raised if it is called on a Person whose height or weight is zero.

-

Check that the NullPersonError exception is raised if .bmi() is called on a default-constructed Person. Check that this exception is not raised if .bmi() is called on a properly constructed Person.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/10_error_handling.html b/html/10_error_handling.html deleted file mode 100644 index 0c57c54..0000000 --- a/html/10_error_handling.html +++ /dev/null @@ -1,12130 +0,0 @@ - - - -10_error_handling - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Error Handling

Exceptions are useful for more than just signalling errors. They can also be used to help you handle the error, and potentially even fix the problem (true self-healing program!).

-

Consider this cut down version of the .setHeight function from the last session...

- -
-
-
-
-
-
In [ ]:
-
-
-
def setHeight(height):
-    if height < 0 or height > 2.5:
-        raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-    print("setting the height to %s" % height)
-
- -
-
-
- -
-
-
-
-
-
-

The code currently correctly detects if the user supplies a height that is below 0 or above 2.5. However, what about when the user tries to set the height to something that is not a number?

- -
-
-
-
-
-
In [ ]:
-
-
-
setHeight("cat")
-
- -
-
-
- -
-
-
-
-
-
-

We get a weird error message that says we have a TypeError, as you cannot order a string and an integer.

-

One way to address this is to ask that height is converted to a float, using height = float(height)

- -
-
-
-
-
-
In [ ]:
-
-
-
def setHeight(height):
-    height = float(height)
-    
-    if height < 0 or height > 2.5:
-        raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-    print("setting the height to %s" % height)
-
- -
-
-
- -
-
-
-
-
-
-

However, this hasn't made the error any easier to understand, as we now get a ValueError raised...

- -
-
-
-
-
-
In [ ]:
-
-
-
setHeight("cat")
-
- -
-
-
- -
-
-
-
-
-
-

The solution is for us to handle the exception, using a try...except block

- -
-
-
-
-
-
In [ ]:
-
-
-
def setHeight(height):
-    try:
-        height = float(height)
-    except:
-        raise TypeError("Invalid height: '%s'. You can only set the height to a numeric value" % height)
-    
-    if height < 0 or height > 2.5:
-        raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-    print("setting the height to %s" % height)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
setHeight("cat")
-
- -
-
-
- -
-
-
-
-
-
-

What's happened here? The try: line starts a try-block. The code that is in the try-block is run. If any of this code raises an exception, then execution stops in the try-block, and switches instead to the code in the except-block (everything within the except: block). In our case, float(height) raised an exception, so execution jumped to the except-block, in which we ran the raise TypeError(...) code.

-

Now the error is much more informative, allowing the user to better understand what has gone wrong. However, exception handling can do more than this. It can allow you to fix the problem. Consider this example...

- -
-
-
-
-
-
In [ ]:
-
-
-
setHeight("1.8 m")
-
- -
-
-
- -
-
-
-
-
-
-

We as humans can see that this could be an acceptable input. However, the computer needs help to understand. We can add code to the except-block that can try to resolve the problem. For example, imagine we had a function that could interpret heights from strings...

- -
-
-
-
-
-
In [ ]:
-
-
-
def string_to_height(height):
-    """This function tries to interpret the passed argument as a height 
-       in meters. The format should be 'X m', 'X meter' or 'X meters',
-       where 'X' is a number
-    """
-    # convert height to a string - this always works
-    height = str(height)
-        
-    words = height.split(" ")
-            
-    if len(words) == 2:
-        if words[1] == "m" or words[1] == "meter" or words[1] == "meters":
-            try:
-                return float(words[0])
-            except:
-                pass
-    
-    # Getting here means that we haven't been able to extract a valid height
-    raise TypeError("Cannot extract a valid height from '%s'" % height)
-
- -
-
-
- -
-
-
-
-
-
-

We can now call this function from within the except-block of setHeight

- -
-
-
-
-
-
In [ ]:
-
-
-
def setHeight(height):
-    try:
-        height = float(height)
-    except:
-        height = string_to_height(height)
-    
-    if height < 0 or height > 2.5:
-        raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-    print("setting the height to %s" % height)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
setHeight("1.8 m")
-
- -
-
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Here is a copy of the Person class from the last session. Edit the setHeight function so that it uses exception handling and the string_to_height function to correctly interpret heights such as "1.8 m", and so that it gives a useful error message if it is given something weird. Check that the function correctly responds to a range of valid and invalid inputs.

- -
-
-
-
-
-
In [ ]:
-
-
-
class Person:
-    """Class that holds a person's height"""
-    def __init__(self, height=0, weight=0):
-        """Construct a person with the specified name, height and weight"""
-        self.setHeight(height)
-        self.setWeight(weight)
-    
-    def setHeight(self, height):
-        """Set the person's height in meters"""
-        if height < 0 or height > 2.5:
-            raise ValueError("Invalid height: %s. This shoud be between 0 and 2.5 meters" % height)
-        self._height = height
-    
-    def setWeight(self, weight):
-        """Set the person's weight in kilograms"""
-        if weight < 0 or weight > 500:
-            raise ValueError("Invalid weight: %s. This should be between 0 and 500 kilograms" % weight)
-        self._weight = weight
-        
-    def getHeight(self):
-        """Return the person's height in meters"""
-        return self._height
-    
-    def getWeight(self):
-        """Return the person's weight in kilograms"""
-        return self._weight
-    
-    def bmi(self):
-        """Return the person's body mass index (bmi)"""
-        if (self.getHeight() == 0 or self.getWeight() == 0):
-            raise NullPersonError("Cannot calculate the BMI of a person with zero "
-                                  "height or weight (%s,%s)" % (self.getHeight(),self.getWeight()))
-            
-        return self.getWeight() / self.getHeight()**2
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

Create a string_to_weight function that interprets weights in kilograms (e.g. "5 kg", "5 kilos" or "5 kilograms"). Now edit the Person.setWeight function so that it uses exception handling and string_to_weight to to correctly interpret weights such as 35.5 kg and gives a useful error message if it is given something weird. Check that your function responds correctly to a range of valid and invalid inputs.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/11_modules.html b/html/11_modules.html deleted file mode 100644 index f331e89..0000000 --- a/html/11_modules.html +++ /dev/null @@ -1,12151 +0,0 @@ - - - -11_modules - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Modules

You can turn any Python script that you write into a module that other people can import and use in their own code.

-

For example;

- -
-
-
-
-
-
In [ ]:
-
-
-
import superhero
-
- -
-
-
- -
-
-
-
-
-
-

What has happened here???

-

There is a file in your current directory called superhero.py. The line import superhero will look in the current directory, to find a file called superhero.py. It then runs this file, just as if you had typed it into the screen.

- -
-
-
-
-
-
-
-
-

This is just a simple Python script, which we can print out using

- -
-
-
-
-
-
In [ ]:
-
-
-
! cat superhero.py
-
- -
-
-
- -
-
-
-
-
-
-

We can get help on the module using help

- -
-
-
-
-
-
In [ ]:
-
-
-
help(superhero)
-
- -
-
-
- -
-
-
-
-
-
-

This documentation comes from the class and function documentation put into the file.

-

You can also use the data, classes and functions in the file, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
ironman = superhero.Superhero(name="Iron Man", weakness="rust")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
superhero.battle(ironman, superhero.lex)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
superhero.lex.steal("rust")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
superhero.battle(ironman, superhero.lex)
-
- -
-
-
- -
-
-
-
-
-
-

One thing to note is that all of the classes, functions and data in the script has been imported into its own namespace, named after the script (e.g. superhero.). We can import the file and put all names into the current namespace using

- -
-
-
-
-
-
In [ ]:
-
-
-
from superhero import *
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
battle(ironman, lex)
-
- -
-
-
- -
-
-
-
-
-
-

While any python script can be imported as a module, there are a few conventions you should follow that will make your module easier for others to use.

-
    -
  • Add documentation to the module. As you can see, there is a docstring at the top of superhero.py, which is the first thing written out by help(). This should provide an overview of the module.
  • -
  • Avoid actually running any code or creating any variables. The current superhero.py is bad as it does this, which is why you see "Is it a bird..." printed when you import it!
  • -
-

The way to avoid creating any variables or running code is to let the script detect when it is being imported, and to not create any variables if that is the case.

-

You can detect if your Python script is not being imported using

-
if __name__ == "__main__":
-    print("I am not being imported.")
-
- -
-
-
-
-
-
In [ ]:
-
-
-
if __name__ == "__main__":
-    print("I am not being imported")
-
- -
-
-
- -
-
-
-
-
-
-

To show how this works, there is a superhero2.py script, which is identical to superhero.py, except all code that should not be run on import is hidden inside the if __name__ == "__main__": block.

- -
-
-
-
-
-
In [ ]:
-
-
-
! cat superhero2.py
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
import superhero2
-
- -
-
-
- -
-
-
-
-
-
-

By using if __name__ == "__main__": we have prevented superhero2.py from printing anything out when it is imported, and have also prevented it from creating the variables lex and superman.

-

You can see this by running the superhero2.py script directory, e.g. using

- -
! python superhero2.py
- -
-
-
-
-
-
In [ ]:
-
-
-
! python superhero2.py
-
- -
-
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Use the "New Text File" option in the Jupyter Home to create a new python text file called morse.py. Copy the below class into this file.

-
class Morse:
-    def __init__(self):
-        self._letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-
-        self._morse_to_letter = {}
-        for letter in self._letter_to_morse.keys():
-            self._morse_to_letter[ self._letter_to_morse[letter] ] = letter
-
-    def encode(self, message):
-        morse = []
-        for letter in message:
-            morse.append( self._letter_to_morse[letter.lower()] )
-        return morse
-
-    def decode(self, morse):
-        message = []
-        for code in morse:
-            message.append( self._morse_to_letter[code] )
-        return "".join(message)
-
-

Add documentation to this class, and to the module. Next, import the module and get help using the commands

-
import morse
-help(morse)
-
-

Does your documentation make sense?

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

Create some checks of your module that should not be run when the module is imported (i.e. only run directly). The checks should be, e.g.

-
morse = Morse()
-
-    for message in ["Hello world", "something to encode", "test message"]:
-        test = morse.decode( morse.encode(message) )
-
-        if message.lower() == test: 
-            print("Success: %s" % message)
-        else:
-            print("Failed: %s" % message)
-
-

Validate that the check doesn't run on import using

-
import morse
-
-

Validate that the check runs from the command line using

- -
! python morse.py
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/12_pandas.html b/html/12_pandas.html deleted file mode 100644 index 39fd863..0000000 --- a/html/12_pandas.html +++ /dev/null @@ -1,12724 +0,0 @@ - - - -12_pandas - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Pandas

Pandas is a library providing high-performance, easy-to-use data structures and data analysis tools. The core of pandas is its dataframe which is essentially a table of data. Pandas provides easy and powerful ways to import data from a variety of sources and export it to just as many. It is also explicitly designed to handle missing data elegantly which is a very common problem in data from the real world.

-

The offical pandas documentation is very comprehensive and you will be answer a lot of questions in there, however, it can sometimes be hard to find the right page. Don't be afraid to use Google to find help.

- -
-
-
-
-
-
-
-
-

Pandas has a standard convention for importing it which you will see used in a lot of documentation so we will follow that in this course:

- -
-
-
-
-
-
In [ ]:
-
-
-
import pandas as pd
-from pandas import Series, DataFrame
-
- -
-
-
- -
-
-
-
-
-
-

Series

The simplest of pandas' data structures is the Series. It is a one-dimensional list-like structure. -Let's create one from a list:

- -
-
-
-
-
-
In [ ]:
-
-
-
Series([14, 7, 3, -7, 8])
-
- -
-
-
- -
-
-
-
-
-
-

There are three main components to this output. -The first column (0, 2, etc.) is the index, by default this is numbers each row starting from zero. -The second column is our data, stored i the same order we entered it in our list. -Finally at the bottom there is the dtype which stands for 'data type' which is telling us that all our data is being stored as a 64-bit integer. -Usually you can ignore the dtype until you start doing more advanced things.

-

In the first example above we allowed pandas to automatically create an index for our Series (this is the 0, 1, 2, etc. in the left column) but often you will want to specify one yourself

- -
-
-
-
-
-
In [ ]:
-
-
-
s = Series([14, 7, 3, -7, 8], index=['a', 'b', 'c', 'd', 'e'])
-print(s)
-
- -
-
-
- -
-
-
-
-
-
-

We can use this index to retrieve individual rows

- -
-
-
-
-
-
In [ ]:
-
-
-
s['a']
-
- -
-
-
- -
-
-
-
-
-
-

to replace values in the series

- -
-
-
-
-
-
In [ ]:
-
-
-
s['c'] = -1
-
- -
-
-
- -
-
-
-
-
-
-

or to get a set of rows

- -
-
-
-
-
-
In [ ]:
-
-
-
s[['a', 'c', 'd']]
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 1

    -
  • Create a Pandas Series with 10 or so elements where the indices are years and the values are numbers.
  • -
  • Experiment with retrieving elements from the Series.
  • -
  • Try making another Series with duplicate values in the index, what happens when you access those elements?
  • -
  • How does a Pandas Series differ from a Python list or dict?
  • -
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Series operations

A Series is list-like in the sense that it is an ordered set of values. It is also dict-like since its entries can be accessed via key lookup. One very important way in which is differs is how it allows operations to be done over the whole Series in one go, a technique often referred to as 'broadcasting'.

-

A simple example is wanting to double the value of every entry in a set of data. In standard Python, you might have a list like

- -
-
-
-
-
-
In [ ]:
-
-
-
my_list = [3, 6, 8, 4, 10]
-
- -
-
-
- -
-
-
-
-
-
-

If you wanted to double every entry you might try simply multiplying the list by 2:

- -
-
-
-
-
-
In [ ]:
-
-
-
my_list * 2
-
- -
-
-
- -
-
-
-
-
-
-

but as you can see, that simply duplicated the elements. Instead you would have to use a for loop or a list comprehension:

- -
-
-
-
-
-
In [ ]:
-
-
-
[i * 2 for i in my_list]
-
- -
-
-
- -
-
-
-
-
-
-

With a pandas Series, however, you can perform bulk mathematical operations to the whole series in one go:

- -
-
-
-
-
-
In [ ]:
-
-
-
my_series = Series(my_list)
-print(my_series)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
my_series * 2
-
- -
-
-
- -
-
-
-
-
-
-

As well as bulk modifications, you can perform bulk selections by putting more complex statements in the square brackets:

- -
-
-
-
-
-
In [ ]:
-
-
-
s[s < 0]  # All negative entries
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
s[(s * 2) > 4]  # All entries which, when doubled are greater than 4
-
- -
-
-
- -
-
-
-
-
-
-

These operations work because the Series index selection can be passed a series of True and False values which it then uses to filter the result:

- -
-
-
-
-
-
In [ ]:
-
-
-
(s * 2) > 4
-
- -
-
-
- -
-
-
-
-
-
-

Here you can see that the rows a, b and e are True while the others are False. Passing this to s[...] will only show rows that are True.

- -
-
-
-
-
-
-
-
-

Multi-Series operations

It is also possible to perform operations between two Series objects:

- -
-
-
-
-
-
In [ ]:
-
-
-
s2 = Series([23,5,34,7,5])
-s3 = Series([7, 6, 5,4,3])
-s2 - s3
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

    -
  • Create two Series objects of equal length with no specified index and containing any values you like. Perform some mathematical operations on them and experiment to make sure it works how you think.
  • -
  • What happens then you perform an operation on two series which have different lengths? How does this change when you give the series some indices?
  • -
  • Using the Series from the first exercise with the years for the index, Select all entries with even-numbered years. Also, select all those with odd-numbered years.
  • -
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

DataFrame

While you can think of the Series as a one-dimensional list of data, pandas' DataFrame is a two (or possibly more) dimensional table of data. You can think of each column in the table as being a Series.

- -
-
-
-
-
-
In [ ]:
-
-
-
data = {'city': ['Paris', 'Paris', 'Paris', 'Paris',
-                 'London', 'London', 'London', 'London',
-                 'Rome', 'Rome', 'Rome', 'Rome'],
-        'year': [2001, 2008, 2009, 2010,
-                 2001, 2006, 2011, 2015,
-                 2001, 2006, 2009, 2012],
-        'pop': [2.148, 2.211, 2.234, 2.244,
-                7.322, 7.657, 8.174, 8.615,
-                2.547, 2.627, 2.734, 2.627]}
-df = DataFrame(data)
-
- -
-
-
- -
-
-
-
-
-
-

This has created a DataFrame from the dictionary data. The keys will become the column headers and the values will be the values in each column. As with the Series, an index will be created automatically.

- -
-
-
-
-
-
In [ ]:
-
-
-
df
-
- -
-
-
- -
-
-
-
-
-
-

Or, if you just want a peek at the data, you can just grab the first few rows with:

- -
-
-
-
-
-
In [ ]:
-
-
-
df.head(3)
-
- -
-
-
- -
-
-
-
-
-
-

Since we passed in a dictionary to the DataFrame constructor, the order of the columns will not necessarilly match the order in which you defined them. To enforce a certain order, you can pass a columns argument to the constructor giving a list of the columns in the order you want them:

- -
-
-
-
-
-
In [ ]:
-
-
-
DataFrame(data, columns=['year', 'city', 'pop'])
-
- -
-
-
- -
-
-
-
-
-
-

When we accessed elements from a Series object, it would select an element by row. However, by default DataFrames index primarily by column. You can access any column directly by using square brackets or by named attributes:

- -
-
-
-
-
-
In [ ]:
-
-
-
df['year']
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
df.city
-
- -
-
-
- -
-
-
-
-
-
-

Accessing a column like this returns a Series which will act in the same way as those we were using earlier.

-

Note that there is one additional part to this output, Name: city. Pandas has remembered that this Series was created from the 'city' column in the DataFrame.

- -
-
-
-
-
-
In [ ]:
-
-
-
type(df.city)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
df.city == 'Paris'
-
- -
-
-
- -
-
-
-
-
-
-

This has created a new Series which has True set where the city is Paris and False elsewhere.

-

We can use filtered Series like this to filter the DataFrame as a whole. df.city == 'Paris' has returned a Series containing booleans. Passing it back into df as an indexing operation will use it to filter based on the 'city' column.

- -
-
-
-
-
-
In [ ]:
-
-
-
df[df.city == 'Paris']
-
- -
-
-
- -
-
-
-
-
-
-

You can then carry on and grab another column after that filter:

- -
-
-
-
-
-
In [ ]:
-
-
-
df[df.city == 'Paris'].year
-
- -
-
-
- -
-
-
-
-
-
-

If you want to select a row from a DataFrame then you can use the .loc attribute which allows you to pass index values like:

- -
-
-
-
-
-
In [ ]:
-
-
-
df.loc[2]
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
df.loc[2]['city']
-
- -
-
-
- -
-
-
-
-
-
-

Adding new columns

New columns can be added to a DataFrame simply by assigning them by index (as you would for a Python dict) and can be deleted with the del keyword in the same way:

- -
-
-
-
-
-
In [ ]:
-
-
-
df['continental'] = df.city != 'London'
-df
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
del df['continental']
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 3

    -
  • Create the DataFrame containing the census data for the three cities.
  • -
  • Select the data for the year 2001. Which city had the smallest population that year?
  • -
  • Find all the cities which had a population smaller than 2.6 million.
  • -
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Reading from file

One of the msot common situations is that you have some data file containing the data you want to read. Perhaps this is data you've produced yourself or maybe it's from a collegue. In an ideal world the file will be perfectly formatted and will be trivial to import into pandas but since this is so often not the case, it provides a number of features to make your ife easier.

-

Full information on reading and writing is available in the pandas manual on IO tools but first it's worth noting the common formats that pandas can work with:

-
    -
  • Comma separated tables (or tab-separated or space-separated etc.)
  • -
  • Excel spreadsheets
  • -
  • HDF5 files
  • -
  • SQL databases
  • -
-

For this course we will focus on plain-text CSV files as they are perhaps the most common format. Imagine we have a CSV file like (you can download this file from city_pop.csv):

- -
-
-
-
-
-
In [ ]:
-
-
-
! cat data/city_pop.csv  # Uses the IPython 'magic' !cat to print the file
-
- -
-
-
- -
-
-
-
-
-
-

We can use the pandas function read_csv() to read the file and convert it to a DataFrame. Full documentation for this function can be found in the manual or, as with any Python object, directly in the notebook by putting a ? after the name:

- -
-
-
-
-
-
In [ ]:
-
-
-
help(pd.read_csv)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
pd.read_csv('data/city_pop.csv')
-
- -
-
-
- -
-
-
-
-
-
-

We can see that by default it's done a fairly bad job of parsing the file (this is mostly because I;ve construsted the city_pop.csv file to be as obtuse as possible). It's making a lot of assumptions about the structure of the file but in general it's taking quite a naïve approach.

-

The first this we notice is that it's treating the text at the top of the file as though it's data. Checking the documentation we see that the simplest way to solve this is to use the skiprows argument to the function to which we give an integer giving the number of rows to skip:

- -
-
-
-
-
-
In [ ]:
-
-
-
pd.read_csv(
-    'data/city_pop.csv',
-    skiprows=5,
-)
-
- -
-
-
- -
-
-
-
-
-
-

The next most obvious problem is that it is not separating the columns at all. This is controlled by the sep argument which is set to ',' by default (hence comma separated values). We can simply set it to the appropriate semi-colon:

- -
-
-
-
-
-
In [ ]:
-
-
-
pd.read_csv(
-    'data/city_pop.csv',
-    skiprows=5,
-    sep=';'
-)
-
- -
-
-
- -
-
-
-
-
-
-

Reading the descriptive header of our data file we see that a value of -1 signifies a missing reading so we should mark those too. This can be done after the fact but it is simplest to do it at import-time using the na_values argument:

- -
-
-
-
-
-
In [ ]:
-
-
-
pd.read_csv(
-    'data/city_pop.csv',
-    skiprows=5,
-    sep=';',
-    na_values='-1'
-)
-
- -
-
-
- -
-
-
-
-
-
-

The last this we want to do is use the year column as the index for the DataFrame. This can be done by passing the name of the column to the index_col argument:

- -
-
-
-
-
-
In [ ]:
-
-
-
df3 = pd.read_csv(
-    'data/city_pop.csv',
-    skiprows=5,
-    sep=';',
-    na_values='-1',
-    index_col='year'
-)
-df3
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 4

    -
  • Alongside data/city_pop.csv there is another file called data/cetml1659on.dat. This contains some historical weather data for a location in the UK. Import that file as a Pandas DataFrame using read_csv(), making sure that you cover all the NaN values.
  • -
  • How many years had a negative average temperature in January?
  • -
  • What was the average temperature in June over the years in the data set? Tip: look in the documentation for which method to call.
  • -
-

We will come back to this data set in a later stage.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/13_basic_numpy.html b/html/13_basic_numpy.html deleted file mode 100644 index 545a4a5..0000000 --- a/html/13_basic_numpy.html +++ /dev/null @@ -1,12289 +0,0 @@ - - - -13_basic_numpy - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Basic NumPy

NumPy ('Numerical Python') is the defacto standard module for doing numerical work in Python. Its main feature is its array data type which allows very compact and efficient storage of homogenous (of the same type) data.

-

A lot of the material in this section is based on SciPy Lecture Notes (CC-by 4.0).

-

As you go through this material, you'll likely find it useful to refer to the NumPy documentation, particularly the array objects section.

-

As with pandas there is a standard convention for importing numpy, and that is as np:

- -
-
-
-
-
-
In [ ]:
-
-
-
import numpy as np
-
- -
-
-
- -
-
-
-
-
-
-

Now that we have access to the numpy package we can start using its features.

-

Creating arrays

In many ways a NumPy array can be treated like a standard Python list and much of the way you interact with it is identical. Given a list, you can create an array as follows:

- -
-
-
-
-
-
In [ ]:
-
-
-
python_list = [1, 2, 3, 4, 5, 6, 7, 8]
-numpy_array = np.array(python_list)
-print(numpy_array)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
# ndim give the number of dimensions
-numpy_array.ndim
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
# the shape of an array is a tuple of its length in each dimension. In this case it is only 1-dimensional
-numpy_array.shape
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
# as in standard Python, len() gives a sensible answer
-len(numpy_array)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
nested_list = [[1, 2, 3], [4, 5, 6]]
-two_dim_array = np.array(nested_list)
-print(two_dim_array)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
two_dim_array.ndim
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
two_dim_array.shape
-
- -
-
-
- -
-
-
-
-
-
-

It's very common when working with data to not have it already in a Python list but rather to want to create some data from scratch. numpy comes with a whole suite of functions for creating arrays. We will now run through some of the most commonly used.

- -
-
-
-
-
-
-
-
-

The first is np.arange (meaning "array range") which works in a vary similar fashion the the standard Python range() function, including how it defaults to starting from zero, doesn't include the number at the top of the range and how it allows you to specify a 'step:

- -
-
-
-
-
-
In [ ]:
-
-
-
np.arange(10) #0 .. n-1  (!)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
np.arange(1, 9, 2) # start, end (exclusive), step
-
- -
-
-
- -
-
-
-
-
-
-

Next up is the np.linspace (meaning "linear space") which generates a given floating point numbers starting from the first argument up to the second argument. The third argument defines how many numbers to create:

- -
-
-
-
-
-
In [ ]:
-
-
-
np.linspace(0, 1, 6)   # start, end, num-points
-
- -
-
-
- -
-
-
-
-
-
-

Note how it included the end point unlike arange(). You can change this feature by using the endpoint argument:

- -
-
-
-
-
-
In [ ]:
-
-
-
np.linspace(0, 1, 5, endpoint=False)
-
- -
-
-
- -
-
-
-
-
-
-

np.ones creates an n-dimensional array filled with the value 1.0. The argument you give to the function defines the shape of the array:

- -
-
-
-
-
-
In [ ]:
-
-
-
np.ones((3, 3))  # reminder: (3, 3) is a tuple
-
- -
-
-
- -
-
-
-
-
-
-

Likewise, you can create an array of any size filled with zeros:

- -
-
-
-
-
-
In [ ]:
-
-
-
np.zeros((2, 2))
-
- -
-
-
- -
-
-
-
-
-
-

The np.eye (referring to the matematical identity matrix, commonly labelled as I) creates a square matrix of a given size with 1.0 on the diagonal and 0.0 elsewhere:

- -
-
-
-
-
-
In [ ]:
-
-
-
np.eye(3)
-
- -
-
-
- -
-
-
-
-
-
-

The np.diag creates a square matrix with the given values on the diagonal and 0.0 elsewhere:

- -
-
-
-
-
-
In [ ]:
-
-
-
np.diag([1, 2, 3, 4])
-
- -
-
-
- -
-
-
-
-
-
-

Finally, you can fill an array with random numbers:

- -
-
-
-
-
-
In [ ]:
-
-
-
np.random.rand(4)  # uniform in [0, 1]
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
np.random.randn(4)  # Gaussian
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 5

    -
  • Experiment with arange, linspace, ones, zeros, eye and diag.
  • -
  • Create different kinds of arrays with random numbers.
  • -
  • Look at the function np.empty. What does it do? When might this be useful?
  • -
- -
-
-
-
-
-
-
-
-

Reshaping arrays

Behind the scenes, a multi-dimensional NumPy array is just stored as a linear segment of memory. The fact that it is presented as having more than one dimension is simply a layer on top of that (sometimes called a view). This means that we can simply change that interpretive layer and change the shape of an array very quickly (i.e without NumPy having to copy any data around).

-

This is mostly done with the reshape() method on the array object:

- -
-
-
-
-
-
In [ ]:
-
-
-
my_array = np.arange(16)
-my_array
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
my_array.shape
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
my_array.reshape((2, 8))
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
my_array.reshape((4, 4))
-
- -
-
-
- -
-
-
-
-
-
-

Note that if you check, my_array.shape will still return (16,) as reshaped is simply a view on the original data, it hasn't actually changed it. If you want to edit the original object in-place then you can use the resize() method.

-

You can also transpose an array using the transpose() method which mirrors the array along its diagonal:

- -
-
-
-
-
-
In [ ]:
-
-
-
my_array.reshape((2, 8)).transpose()
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
my_array.reshape((4,4)).transpose()
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 6

Using the NumPy documentation at https://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html, to create, in one line a NumPy array which looks like:

-
[10,  60,  20,  70,  30,  80,  40,  90,  50, 100]
-
-

Hint: you will need to use transpose(), reshape() and arange() as well as one new function from the "Shape manipulation" section of the documentation. Can you find a method which uses less than 4 function calls?

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
- -
-
-
-
- - - - - - diff --git a/html/14_more_numpy.html b/html/14_more_numpy.html deleted file mode 100644 index 22f3362..0000000 --- a/html/14_more_numpy.html +++ /dev/null @@ -1,12189 +0,0 @@ - - - -14_more_numpy - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

More Numpy

Carrying on from the last lesson we will continue learning how to manipulate data in numpy before using matplotlib to plot our data.

- -
-
-
-
-
-
In [ ]:
-
-
-
import numpy as np
-
- -
-
-
- -
-
-
-
-
-
-

Basic data types

You may have noticed that, in some instances, array elements are displayed with a trailing dot (e.g. 2. vs 2). This is due to a difference in the data-type used:

- -
-
-
-
-
-
In [ ]:
-
-
-
a = np.array([1, 2, 3])
-a.dtype
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
b = np.array([1., 2., 3.])
-b.dtype
-
- -
-
-
- -
-
-
-
-
-
-

Different data-types allow us to store data more compactly in memory, but most of the time we simply work with floating point numbers. Note that, in the example above, NumPy auto-detects the data-type from the input but you can specify it explicitly:

- -
-
-
-
-
-
In [ ]:
-
-
-
c = np.array([1, 2, 3], dtype=float)
-c.dtype
-
- -
-
-
- -
-
-
-
-
-
-

The default data type is floating point.

- -
-
-
-
-
-
In [ ]:
-
-
-
d = np.ones((3, 3))
-d.dtype
-
- -
-
-
- -
-
-
-
-
-
-

There are other data types as well:

- -
-
-
-
-
-
In [ ]:
-
-
-
e = np.array([1+2j, 3+4j, 5+6*1j])
-type(1j)
-#e.dtype
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
f = np.array([True, False, False, True])
-f.dtype
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
g = np.array(['Bonjour', 'Hello', 'Hallo',])
-g.dtype     # <--- strings containing max. 7 letters
-
- -
-
-
- -
-
-
-
-
-
-

We previously came across dtypes when learing about pandas. This is because pandas uses NumPy as its underlying library. A pandas.Series is essentially a np.array with some extra features wrapped around it.

- -
-
-
-
-
-
-
-
-

Exercise 1

Recreate some of the arrays we created in yesterday's session and look at what dtype they have.

- -
-
-
-
-
-
-
-
-

Why NumPy

To show some of the advantages of NumPy over a standard Python list, let's do some benchmarking. It's an important habit in programming that whenever you think one method may be faster than another, you check to see whether your assumption is true.

-

Python provides some tools to make this easier, particularly via the timeit module. Using this functionality, IPython provides a %timeit magic function to make our life easier. To use the %timeit magic, simply put it at the beginning of a line and it will give you information about how ling it took to run. It doesn't always work as you would expect so to make your life easier, put whatever code you want to benchmark inside a function and time that function call.

-

We start by making a list and an array of 10000 items each of values counting from 0 to 9999:

- -
-
-
-
-
-
In [ ]:
-
-
-
python_list = list(range(100000))
-numpy_array = np.arange(100000)
-
- -
-
-
- -
-
-
-
-
-
-

We are going to go through each item in the list and double its value in-place, such that the list is changed after the operation. To do this with a Python list we need a for loop:

- -
-
-
-
-
-
In [ ]:
-
-
-
def python_double(a):
-    for i, val in enumerate(a):
-        a[i] = val * 2
-
-%timeit python_double(python_list)
-
- -
-
-
- -
-
-
-
-
-
-

To do the same operation in NumPy we can use the fact that multiplying a NumPy array by a value will apply that operation to each of its elements:

- -
-
-
-
-
-
In [ ]:
-
-
-
def numpy_double(a):
-    a *= 2
-
-%timeit numpy_double(numpy_array)
-
- -
-
-
- -
-
-
-
-
-
-

As you can see, the NumPy version is at least 10 times faster, sometimes up to 100 times faster.

-

Have a think about why this might be, what is NumPy doing to make this so much faster? There are two main parts to the answer.

- -
-
-
-
-
-
-
-
-

Copies and views

A slicing operation (like reshaping before) creates a view on the original array, which is just a way of accessing array data. Thus the original array is not copied in memory. This means you can do this to large arrays without any great performance hit. You can use np.may_share_memory() to check if two arrays share the same memory block. Note however, that this uses heuristics and may give you false positives.

-

When modifying the view, the original array is modified as well:

- -
-
-
-
-
-
In [ ]:
-
-
-
a = np.arange(10)
-a
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
b = a[3:7]
-
-np.may_share_memory(a, b)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
b[0] = 12
-b
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
a   # (!)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
a = np.arange(10)
-c = a[::2].copy()  # force a copy
-c[0] = 12
-a
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
np.may_share_memory(a, c)  # we made a copy so there is no shared memory
-
- -
-
-
- -
-
-
-
-
-
-

Whether you make a view or a copy can affect the speed of your code significantly. Be in the habit of checking whether your code is doing unnecessacy work. Also, be sure to benchmark your code as you work on it so that you notice any slowdowns and so that you know which parts are slow so you speed the right bits up.

- -
-
-
-
-
-
-
-
-

Exercise 2

    -
  • Using %timeit, time how long finding the square roots of a list of numbers would take under both standard Python and numpy.
      -
    • Tip: Python's square root function is math.sqrt. numpy's is np.sqrt.
    • -
    -
  • -
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/15_matplotlib.html b/html/15_matplotlib.html deleted file mode 100644 index 149951e..0000000 --- a/html/15_matplotlib.html +++ /dev/null @@ -1,12493 +0,0 @@ - - - -15_matplotlib - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Plotting data with matplotlib

Plotting of data is pandas is handled by an external Python module called matplotlib. Like pandas it is a large library and has a venerable history (first released in 2003) and so we couldn't hope to cover all its functionality in this course. To see the wide range of possibilities you have with matplotlib see its example gallery.

-

Here we will cover the basic uses of it and how it integrates with pandas. While working through these examples you will likely find it very useful to refer to the matplotlib documentation.

- -
-
-
-
-
-
-
-
-

First we import pandas and numpy in the same way as we did previously.

- -
-
-
-
-
-
In [ ]:
-
-
-
import numpy as np
-import pandas as pd
-from pandas import Series, DataFrame
-
- -
-
-
- -
-
-
-
-
-
-

Some matplotlib functionality is provided directly through pandas (such as the plot() method as we will see) but for much of it you need to import the matplotlib interface itself.

-

The most common interface to matplotlib is its pyplot module which provides a way to affect the current state of matplotlib directly. By convention this is imported as plt.

-

We also set the figure format to be SVG so that the plots look a little nicer in our Jupyter notebook.

- -
-
-
-
-
-
In [ ]:
-
-
-
import matplotlib.pyplot as plt
-%config InlineBackend.figure_format = 'svg'
-
- -
-
-
- -
-
-
-
-
-
-

Once we have imported matplotlib we can start calling its functions. Any functions called on the plt object will affect all of matplotlib from that point on in the script.

- -
-
-
-
-
-
-
-
-

We first need to import some data to plot. Let's start with the data from the pandas section (available from cetml1659on.dat) and import it into a DataFrame:

- -
-
-
-
-
-
In [ ]:
-
-
-
df = pd.read_csv(
-    'data/cetml1659on.dat',  # file name
-    skiprows=6,  # skip header
-    sep='\s+',  # whitespace separated
-    na_values=['-99.9', '-99.99'],  # NaNs
-)
-df.head()
-
- -
-
-
- -
-
-
-
-
-
-

Pandas integrates matplotlib directly into itself so any dataframe can be plotted easily simply by calling the plot() method on one of the columns. This creates a plot object which you can then edit and alter, for example by setting the axis labels using the plt.ylabel() function before displaying it with plt.show().

-

Matplotlib operates on a single global state and calling any function on plt will alter that state. Calling df.plot() sets the currently operating plot. plt.ylabel() then alters that state and plt.show() displays it.

- -
-
-
-
-
-
In [ ]:
-
-
-
df['JAN'].plot()
-
-plt.ylabel(r'Temperature ($^\circ$C)')
-
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 1

    -
  • Make sure you can reproduce the plot above. Try tweaking the labels or which column is plotted.
  • -
  • Try putting in two plot() calls with different months (January and July for example) before calling show().
  • -
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Making it prettier

-
-
-
-
-
-
-
-
-

While it's useful to be able to quickly plot any data we have in front of us, matplotlib's power comes from its configurability. Let's experiment with a dataset and see how much we can change the plot.

-

We'll start with a simple DataFrame contianing two columns, one with the values of a cosine, the other with the values of a sine.

- -
-
-
-
-
-
In [ ]:
-
-
-
X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
-data = {'cos': np.cos(X), 'sin': np.sin(X)}
-trig = DataFrame(index=X, data=data)
-
-trig.plot()
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

You can see that it has plotted the sine and cosine curves between $\pi$ and $-\pi$. Now, let's go through and see how we can affect the display of this plot.

- -
-
-
-
-
-
-
-
-

Changing colours and line widths

First step, we want to have the cosine in blue and the sine in red and a slighty thicker line for both of them.

- -
-
-
-
-
-
In [ ]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

    -
  • Using the temperature dataset, set the colours of the July and January lines to a warm colour and a cool colour.
  • -
  • Add in the yearly average column to the plot with a dashed line style.
  • -
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Setting limits

Current limits of the figure are a bit too tight and we want to make some space in order to clearly see all data points.

- -
-
-
-
-
-
In [ ]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-### New code
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

Setting ticks

Current ticks are not ideal because they do not show the interesting values ($\pm\pi$,$\pm\frac{\pi}{2}$) for sine and cosine. We’ll change them such that they show only these values.

- -
-
-
-
-
-
In [ ]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-### New code
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi])
-plt.yticks([-1, 0, +1])
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

Setting tick labels

Ticks are now properly placed but their label is not very explicit. We could guess that 3.142 is $\pi$ but it would be better to make it explicit. When we set tick values, we can also provide a corresponding label in the second argument list. Note that we’ll use LaTeX to allow for nice rendering of the label.

- -
-
-
-
-
-
In [ ]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-### New code
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi],
-           [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$'])
-
-plt.yticks([-1, 0, +1],
-           [r'$-1$', r'$0$', r'$+1$'])
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

Moving spines

Spines are the lines connecting the axis tick marks and noting the boundaries of the data area. They can be placed at arbitrary positions and until now, they were on the border of the axis. We’ll change that since we want to have them in the middle. Since there are four of them (top/bottom/left/right), we’ll discard the top and right by setting their color to none and we’ll move the bottom and left ones to coordinate 0 in data space coordinates.

- -
-
-
-
-
-
In [ ]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi])
-plt.yticks([-1, 0, +1])
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi],
-           [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$'])
-
-plt.yticks([-1, 0, +1],
-           [r'$-1$', r'$0$', r'$+1$'])
-
-### New code
-ax = plt.gca()  # gca stands for 'get current axis'
-ax.spines['right'].set_color('none')
-ax.spines['top'].set_color('none')
-ax.xaxis.set_ticks_position('bottom')
-ax.spines['bottom'].set_position(('data',0))
-ax.yaxis.set_ticks_position('left')
-ax.spines['left'].set_position(('data',0))
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

Adding a legend

Let’s add a legend in the upper left corner. This only requires adding the keyword argument label (that will be used in the legend box) to the plot commands.

- -
-
-
-
-
-
In [ ]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi])
-plt.yticks([-1, 0, +1])
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi],
-           [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$'])
-
-plt.yticks([-1, 0, +1],
-           [r'$-1$', r'$0$', r'$+1$'])
-
-ax = plt.gca()  # gca stands for 'get current axis'
-ax.spines['right'].set_color('none')
-ax.spines['top'].set_color('none')
-ax.xaxis.set_ticks_position('bottom')
-ax.spines['bottom'].set_position(('data',0))
-ax.yaxis.set_ticks_position('left')
-ax.spines['left'].set_position(('data',0))
-
-### New code
-plt.legend(loc='upper left')
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

Annotate some points

Let’s annotate some interesting points using the annotate command. We chose the $\frac{2}{3}\pi$ value and we want to annotate both the sine and the cosine. We’ll first draw a marker on the curve as well as a straight dotted line. Then, we’ll use the annotate command to display some text with an arrow.

- -
-
-
-
-
-
In [ ]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi])
-plt.yticks([-1, 0, +1])
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi],
-           [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$'])
-
-plt.yticks([-1, 0, +1],
-           [r'$-1$', r'$0$', r'$+1$'])
-
-ax = plt.gca()  # gca stands for 'get current axis'
-ax.spines['right'].set_color('none')
-ax.spines['top'].set_color('none')
-ax.xaxis.set_ticks_position('bottom')
-ax.spines['bottom'].set_position(('data',0))
-ax.yaxis.set_ticks_position('left')
-ax.spines['left'].set_position(('data',0))
-
-plt.legend(loc='upper left')
-
-### New code
-t = 2 * np.pi / 3
-plt.plot([t, t], [0, np.cos(t)], color='blue', linewidth=2.5, linestyle="--")
-plt.scatter([t, ], [np.cos(t), ], 50, color='blue')
-
-plt.annotate(r'$cos(\frac{2\pi}{3})=-\frac{1}{2}$',
-             xy=(t, np.cos(t)), xycoords='data',
-             xytext=(-90, -50), textcoords='offset points', fontsize=16,
-             arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
-
-plt.plot([t, t],[0, np.sin(t)], color='red', linewidth=2.5, linestyle="--")
-plt.scatter([t, ],[np.sin(t), ], 50, color='red')
-
-plt.annotate(r'$sin(\frac{2\pi}{3})=\frac{\sqrt{3}}{2}$',
-             xy=(t, np.sin(t)), xycoords='data',
-             xytext=(+10, +30), textcoords='offset points', fontsize=16,
-             arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

Now you know how to make different modifications to your plots we can make some of these changes to our temerature data.

- -
-
-
-
-
-
-
-
-

Saving plot to a file

You can take any plot you've created within Jupyter and save it to a file on disk using the plt.savefig() function. You give the function the name of the file to create and it will use whatever format is specified by the name.

- -
-
-
-
-
-
In [ ]:
-
-
-
trig.plot()
-
-plt.show()
-
-plt.savefig('my_fig.svg')
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 3

    -
  • Add in a legend for the data.
  • -
  • Add an annotation to one of the spikes in the data. Make sure the label is placed nicely.
      -
    • Tip: you can get the year and temperature for a spike using:
      warm_winter_year = df['JAN'].idxmax()
      -warm_winter_temp = df['JAN'].max()
      -
      -
    • -
    -
  • -
  • Save the figure to a file and display it in your Jupyter notebook.
  • -
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Bar charts

Of course, Matplotlib can plot more than just line graphs. One of the other most common plot types is a bar chart. Let's work towards plotting a bar chart of the average temperature per decade.

-

Let's start by adding a new column to the data frame which represents the decade. We create it by taking the index (which is a list of years), converting each element to a string and then replacing the fourth character with a '0'.

- -
-
-
-
-
-
In [ ]:
-
-
-
years = Series(df.index, index=df.index).apply(str)
-decade = years.apply(lambda x: x[:3]+'0')
-
-df['decade'] = decade
-df.head()
-
- -
-
-
- -
-
-
-
-
-
-

Once we have our decade column, we can use Pandas groupby() function to gather our data by decade and then aggregate it by taking the mean of each decade.

- -
-
-
-
-
-
In [ ]:
-
-
-
by_decade = df.groupby('decade')
-agg = by_decade.aggregate(np.mean)
-
-agg.head()
-
- -
-
-
- -
-
-
-
-
-
-

At this point, agg is a standard Pandas DataFrame so we can plot it like any other, by putting .bar after the plot call:

- -
-
-
-
-
-
In [ ]:
-
-
-
agg.YEAR.plot.bar()
-
-plt.ylabel(r'Temperature ($^\circ$C)')
-
-plt.show()
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 4

    -
  1. Plot a bar chart of the average temperature per century.

    -
      -
    • Set the limits of the y-axis to zoom in on the data.
    • -
    -
  2. -
  3. Plot a histogram of the average annual temperature

    -
      -
    • Make sure that the x-axis is labelled correctly.
    • -
    • Tip: Look in the documentation for the right command to run
    • -
    -
  4. -
  5. Plot a scatter plot of each year's February temperature plotted against that year's January temperature. Is there an obvious correlation?

    -
  6. -
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/16_viewing_molecules.html b/html/16_viewing_molecules.html deleted file mode 100644 index 0407307..0000000 --- a/html/16_viewing_molecules.html +++ /dev/null @@ -1,12038 +0,0 @@ - - - -16_viewing_molecules - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Viewing Molecules

nglview is an extremely powerful and capable 3D molecule view that runs within a web browser. It supports complex visualisations of molecules from a range of file formats, and can even be used to view trajectories. It provides a full framework for building 3D molecular visualisation into your Jupyter notebooks or websites.

-

While nglview is very powerful, that power and flexibility can be a little daunting for newcomers. BioSimSpace is a project that provides easy-to-use wrappers around common molecular simulation tasks. One such task is viewing molecules. BioSimSpace provides the function viewMolecules that uses nglview to do exactly that :-)

- -
-
-
-
-
-
In [ ]:
-
-
-
from BioSimSpace import viewMolecules
-v = viewMolecules("data/dioxin.pdb")
-
- -
-
-
- -
-
-
-
-
-
-

The above code has use the molecule file parsers built into BioSimSpace to load the molecule contained in dioxin.pdb. This is then rendered using nglview. The above nglview interface allows you to rotate the molecule (left click and drag), zoom in and out (pinch or scroll up or down) and translate (right click and drag, or control+click on a Mac).

-

Try moving and rotating the molecule. If you lose the molecule, click the "Center" button in the General tab to recenter the molecule.

-

Simple molecule view

- -
-
-
-
-
-
-
-
-

The BioSimSpace viewMolecules function has done two things:

-
    -
  • it first loaded the molecule(s) from the file,
  • -
  • and it then rendered them
  • -
-

Loading molecules can take a long time and use a lot of memory. To prevent you from having to repeatedly load molecules, the viewMolecules function has returned a view object that can be re-used. To see how to use it, use python's help...

- -
-
-
-
-
-
In [ ]:
-
-
-
help(v)
-
- -
-
-
- -
-
-
-
-
-
-

As you can see, we can use v.system() to view all of the loaded molecules again, without having to reload them.

- -
-
-
-
-
-
In [ ]:
-
-
-
v.system()
-
- -
-
-
- -
-
-
-
-
-
-

You can change the representation of the molecule by clicking on the "Representation" tab. First click the "Remove" icon to remove the current representation. Then click the drop-down representation box to choose another representation (e.g. "spacefill"). Then click the "Add" icon to add that representation. Experiment with adding and removing different representations.

-

Different representations

- -
-
-
-
-
-
-
-
-

Loading lots of molecules

nglview isn't just limited to viewing small molecules. It also works really well as a viewer for large molecular systems. It (sometimes) is sufficiently clever to select appropriate representations for the molecules being loaded.

-

For example, view the protein-ligand complex in data/complex.pdb

- -
-
-
-
-
-
In [ ]:
-
-
-
v = viewMolecules("data/complex.pdb")
-
- -
-
-
- -
-
-
-
-
-
-

In this case, nglview has automatically selected a cartoon representation for the protein and a ball and stick representation for the ligand.

-

You can achieve this yourself by using selections to set different representation for different molecules (or parts of molecules). First, delete the default representations by repeatedly clicking the "Remove" button in the representations tab. Once you have removed all of them, we will add a new representation. Select the type as surface, and then type "protein" into the selection box (which starts off with a "*" in it).

-

Select protein

-

Click "Add". After some time thinking, nglview will show you a surface representation of the protein.

-

Next, add a "spacefill" representation to the ligand. The ligand residue is called "LIG", so to do this, select "spacefill", type "LIG" into the selection box, and then click add. You should now see the ligand neatly bound into the protein.

-

Select ligand

-

The selection box can be used to select proteins ("protein"), water ("water"), everything ("*") or residues by name (e.g. "LIG") or number (e.g. "35"). Play around creating different selections and representations. For example, create a "point" representation for water, a "tube" representation of the protein and a "licorice" representation of all alanine residues. Note - you can control the opacity (level of transparency) of a representation by selecting the representation in the drop down box and changing the "opacity" slider in the "Parameters" tab - see below. You can also change things like the colour scheme of the representation in this "Parameters" tab

-

Opacity

- -
-
-
-
-
-
-
-
-

Viewing individual molecules

The view object returned by BioSimSpace can be used to view specific molecules from the file. To do this, use the molecules function. This takes a list of indicies of the molecules you want to view. For example, to view the first molecule (molecule at index 0) type;

- -
-
-
-
-
-
In [ ]:
-
-
-
v.molecules([0])
-
- -
-
-
- -
-
-
-
-
-
-

while to view molecules 100-1000 use the below code (noting that you may need to add a "ball and stick" represntation in case nglview automatically hides the water molecules).

- -
-
-
-
-
-
In [ ]:
-
-
-
v.molecules( range(100,1000) )
-
- -
-
-
- -
-
-
-
-
-
-

Loading more complex files

BioSimSpace provides reader and writers for a variety of molecular file formats. Some of these split the molecular data over multiple files, e.g. a topology and coordinate file. To view these, pass all of the necessary files to viewMolecules in a list, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
v = viewMolecules(["data/ala.top","data/ala.crd"])
-
- -
-
-
- -
-
-
-
-
-
-

This can be combined with molecule selection, e.g. to load and view only molecules 0-4 in the file pass the indicies of the molecules you want to view as a second argument to viewMolecule, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
v = viewMolecules(["data/ala.top","data/ala.crd"], [0,1,2,3,4])
-
- -
-
-
- -
-
-
-
-
-
-

(in reality, all molecules are loaded, but only molecules specified by the indicies are viewed. You can still use v.system() to view all molecules)

- -
-
-
-
-
-
In [ ]:
-
-
-
v.system()
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/17_regular_expressions.html b/html/17_regular_expressions.html deleted file mode 100644 index 474aa0e..0000000 --- a/html/17_regular_expressions.html +++ /dev/null @@ -1,12456 +0,0 @@ - - - -17_regular_expressions - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Regular Expressions

In the error handling session we tried to interpret strings as valid heights and weights. This involved looking for text such as "meter" or "kilogram" in the string, and then extracting the number. This process is called pattern matching, and is best undertaken using a regular expression.

-

Regular expressions have a long history and are available in most programming languages. Python implements a standards-compliant regular expression module, which is called re.

- -
-
-
-
-
-
In [ ]:
-
-
-
import re
-
- -
-
-
- -
-
-
-
-
-
-

Let's create a string that contains a height and see if we can use a regular expression to match that...

- -
-
-
-
-
-
In [ ]:
-
-
-
h = "2 meters"
-
- -
-
-
- -
-
-
-
-
-
-

To search for string "meters" in a string, using re.search, e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
if re.search("meters", h):
-    print("String contains 'meters'")
-else:
-    print("No match")
-
- -
-
-
- -
-
-
-
-
-
-

re.search returns a match object if there is a match, or None if there isn't.

- -
-
-
-
-
-
In [ ]:
-
-
-
m = re.search("meters", h)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

This matches "meters", but what about "meter". "meter" is "meters" without an "s". You can specify that a letter is matched 0 or 1 times using "?"

- -
-
-
-
-
-
In [ ]:
-
-
-
h = "2 meter"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m = re.search("meters?", h)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

However, this has still not worked, as we match "meters" in the middle of the string. We need to match "meters" only at the end of the string. We do this using "$", which means match at end of string

- -
-
-
-
-
-
In [ ]:
-
-
-
m = re.search("meters?$", h)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

We also want to be able to match "m" as well as "meters". To do this, we need to use the "or" operator, which is "|". It is a good idea to put this in round brackets to make both sides of the "or" statement clear.

- -
-
-
-
-
-
In [ ]:
-
-
-
h = "2 m"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m = re.search("(m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

Next, we want to match the number, e.g. "X meters", where "X" is a number. You can use "\d" to represent any number. For example

- -
-
-
-
-
-
In [ ]:
-
-
-
h = "2 meters"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m = re.search("\d (m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

A problem with the above example is that it only matches a number with a single digit, as "\d" only matches a single number. To match one or more digits, we need to put a "+" afterwards, as this means "match one or more", e.g.

- -
-
-
-
-
-
In [ ]:
-
-
-
h = "10 meters"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m = re.search("\d+ (m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

This match breaks if the number is has decimal point, as it doesn't match the "\d". To match a decimal point, you need to use "\.", and also "?", which means "match 0 or 1 decimal points", and then "\d*", which means "match 0 or more digits"

- -
-
-
-
-
-
In [ ]:
-
-
-
h = "1.5 meters"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m = re.search("\d+\.?\d* (m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

The number must match at the beginning of the string. We use "^" to mean match at start...

- -
-
-
-
-
-
In [ ]:
-
-
-
h = "some 1.8 meters"
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m = re.search("^\d+\.?\d* (m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

Finally, we want this match to be case insensitive, and would like the user to be free to use as many spaces as they want between the number and the unit, before the string or after the string... To do this we use "\s*" to represent any number of spaces, and match using re.IGNORECASE.

- -
-
-
-
-
-
In [ ]:
-
-
-
h = "   1.8 METers   "
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m = re.search("^\s*\d+\.?\d*\s*(m|meters?)\s*$", h, re.IGNORECASE)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

The round brackets do more than just groups parts of your search. They also allow you extract the parts that match.

- -
-
-
-
-
-
In [ ]:
-
-
-
m.groups()
-
- -
-
-
- -
-
-
-
-
-
-

You can place round brackets around the parts of the match you want to capture. In this case, we want to get the number...

- -
-
-
-
-
-
In [ ]:
-
-
-
m = re.search("^\s*(\d+\.?\d*)\s*(m|meters?)\s*$", h, re.IGNORECASE)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
m.groups()
-
- -
-
-
- -
-
-
-
-
-
-

As m.groups()[0] contains the match of the first set of round brackets (which is the number), then we can get the number using m.groups()[0]. This enables us to rewrite the string_to_height function from the last section as;

- -
-
-
-
-
-
In [ ]:
-
-
-
def string_to_height(height):
-    """Parse the passed string as a height. Valid formats are 'X m', 'X meters' etc.""" 
-    m = re.search("^\s*(\d+\.?\d*)\s*(m|meters?)\s*$", height, re.IGNORECASE)
-    
-    if m:
-        return float(m.groups()[0])
-    else:
-        raise TypeError("Cannot extract a valid height from '%s'" % height)
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
h = string_to_height("   1.5    meters   ")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
h
-
- -
-
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Rewrite your string_to_weight function using regular expressions. Check that it responds correctly to a range of valid and invalid weights.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

Update string_to_height so that it can also understand heights in both meters and centimeters (returning the height in meters), and update string_to_weight so that it can also understand weights in both grams and kilograms (returning the weight in kilograms). Note that you may find it easier to separate the number from the units. You can do this using the below function to divide the string into the number and units. This uses "\w" to match any word character.

- -
-
-
-
-
-
In [ ]:
-
-
-
def get_number_and_unit(s):
-    """Interpret the passed string 's' as "X units", where "X" is a number and
-       "unit" is the unit. Returns the number and (lowercased) unit
-    """
-    m = re.search("^\s*(\d+\.?\d*)\s*(\w+)\s*$", s, re.IGNORECASE)
-
-    if m:
-        number = float(m.groups()[0])
-        unit = m.groups()[1].lower()
-        return (number, unit)
-    else:
-        raise TypeError("Cannot extract a valid 'number unit' from '%s'" % s)       
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/01_jupyter_howto.html b/html/answers/01_jupyter_howto.html deleted file mode 100644 index a84dd50..0000000 --- a/html/answers/01_jupyter_howto.html +++ /dev/null @@ -1,13313 +0,0 @@ - - - -01_jupyter_howto - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

What is Jupyter and how do you use it?

Project Jupyter exists to develop software and standards for interactive computing. Jupyter, which you are using now, provides an interactive notebook which runs on a cloud-server, and with which you interact using a web browser.

-

As installed, Jupyter provides two things:

-
    -
  • An interactive Python notebook, which you are using now (this is an interactive notebook!)
  • -
  • An interactive bash terminal (we will use this later)
  • -
-

What is an interactive Python notebook? It is a notebook that mixes documentation (like this!) with Python code (like below), with the output of that code.

-

For example, below is the Python code to print "Hello World". Click on the below code and then press "SHIFT+Return". You should see that the code is run, and below "Hello World" will be printed.

- -
-
-
-
-
-
In [1]:
-
-
-
print("Hello World")
-
- -
-
-
- -
-
- - -
-
- -
-
Hello World
-
-
-
- -
-
- -
-
-
-
-
-
-

The above code is interactive because it runs when you pressed "SHIFT+Return". It is also interactive because you can change it. For example, click again on the code and change "Hello World" to "Hello Jupyter". Press "SHIFT+Return" again and you should see "Hello Jupyter" printed.

-

A code cell can have as much or little Python in it as you want. The below cell defines a function and then runs it in a loop. What do you think will be printed out when you select it and press "SHIFT+Return"? Have a go. Are you right?

- -
-
-
-
-
-
In [2]:
-
-
-
def countDown(start):
-    for i in range(start, 0, -1):
-        print("%d..." % i)
-    print("Lift off!")
-    
-countDown(10)
-
- -
-
-
- -
-
- - -
-
- -
-
10...
-9...
-8...
-7...
-6...
-5...
-4...
-3...
-2...
-1...
-Lift off!
-
-
-
- -
-
- -
-
-
-
-
-
-

Each cell can use the variables and functions defined in previous cells. For example, the next cell sets the value of the variable x to 5, while the following cell then uses that to call the countDown function.

- -
-
-
-
-
-
In [11]:
-
-
-
x = 21
-
- -
-
-
- -
-
-
-
In [12]:
-
-
-
countDown(x)
-
- -
-
-
- -
-
- - -
-
- -
-
21...
-20...
-19...
-18...
-17...
-16...
-15...
-14...
-13...
-12...
-11...
-10...
-9...
-8...
-7...
-6...
-5...
-4...
-3...
-2...
-1...
-Lift off!
-
-
-
- -
-
- -
-
-
-
-
-
-

You can go back up and edit the value of x. For example, set x to 7 and press SHIFT+Return. This has changed x. However, it doesn't re-run any code that depends on x. That is for you to do. You need to click on the next cell countDown(x) and press SHIFT+Return again. You should then see that the code is run and updated.

-

Because you can change any cell at any time, a notebook can get into a confused state. If we change x again to 21, then the cell countDown(x), is now out of date, as it depends on the old value of x. So, how can we know what state each cell is in? The state is based on the order in which cells are executed. This is shown by the little number next to each cell, e.g. see below.

-

Image showing Jupyter state

-

The number "10" shows that the line x = 21 was the 10th cell executed by Python, while the line countDown(x) was the 9th cell executed. As countDown(x) was executed before x = 21, it was not affected by x = 21. If you want it to be affected, you need to execute that cell again (so it is now the 11th cell executed), e.g.

-

Image showing updated Jupyer state

-

If you find yourself getting confused, then click "Kernel | Restart & Clear Output" from the menu above to clear all outputs, clear the state, and reset back to a clean notebook.

-

Image showing clear and restart

- -
-
-
-
-
-
-
-
-

Interactive Graphics

In addition to mixing documentation with interactive Python, the notebook also allows Python modules to embed interactive graphics. For example, the BioSimSpace Project provides a easy wrapper around the Python molecular viewer nglview. This can be used to view molecules from within a notebook.

-

Execute the cell below by selecting it and pressing SHIFT+Return. This will import the viewMolecules function from BioSimSpace, which will load the molecules contained in the file data/complex.pdb and will display it below.

- -
-
-
-
-
-
In [13]:
-
-
-
from BioSimSpace import viewMolecules
-v = viewMolecules("data/complex.pdb")
-
- -
-
-
- -
-
- - -
-
- -
-
Reading molecules from '['data/complex.pdb']'
-Rendering the molecules...
-
-
-
- -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
- -
-
-
-
-
-
-

You can rotate and interact with molecules once they have loaded (the "[*]" next to a cell indicates that it is processing. It will turn into a number once processing is complete)

-

You can change molecular representations and interact with the 3D molecule view. Later on we will go through some examples showing what you can do and how you can select parts of the molecule.

-

In addition to viewing molecules, you can also draw graphs. This is achieved using a combination of the matplotlib, numpy and pandas modules. For example, the below code draws a sine and cosine curve.

- -
-
-
-
-
-
In [14]:
-
-
-
# First, import pandas, numpy and matplotlib
-import pandas as pd
-from pandas import Series, DataFrame
-import numpy as np
-import matplotlib.pyplot as plt
-%config InlineBackend.figure_format = 'svg'   # helps make things look better in Jupyter :-)
-
-# now define a sine and cosine curve
-X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
-data = {'cos': np.cos(X), 'sin': np.sin(X)}
-trig = DataFrame(index=X, data=data)
-
-trig.plot()
-
- -
-
-
- -
-
- - -
-
Out[14]:
- - - -
-
<matplotlib.axes._subplots.AxesSubplot at 0x7f07313ecac8>
-
- -
- -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Later on we will go through pandas, matplotlib and numpy and you will learn how to draw lots of different and interesting graphs.

- -
-
-
-
-
-
-
-
-

Mixing documentation and code

In addition to interactivity, the real benefit of a Jupyter notebook is that it encourages you to include lots of documentation with your code. In effect, a notebook is a combination of code, documentation, and (once it has been run) analysis and data.

-

You can create documentation by changing the type of a cell. The type of a cell is indicated in the drop-down at the top, under the menu. If you click on the graph above, you will see that the type of a code cell is "Code".

-

Image showing code type

-

If you now click on this documentation, you should see the cell type change to "Markdown"

-

Image showing markdown type

- -
-
-
-
-
-
-
-
-

Creating your own cells

You can create your own cells in two different ways;

-
    -
  • either by pressing SHIFT+Return on the last cell (which creates a new cell at the bottom of the notebook),
  • -
  • or by clicking "Insert | Cell Above" or "Insert | Cell Below" from the menu to insert a new cell above or below the currently selected cell.
  • -
-

You can choose whether this should be a code or documentation (markdown) cell by selecting the appropriate value from the drop-down at the top.

-

Code cells can contain any valid Python. Documentation (markdown) cells can contain any text. The text can be formatted, using the markdown standard.

-

Have a go by creating some new documentation and code cells below. If you want some inspiration for documentation, take a look at the documentation cells above by double-clicking on them. You should see how I have inserted images, lists, headings etc.

-

You can then render the documentation using SHIFT+Return (just like running a code cell)

- -
-
-
-
-
-
In [15]:
-
-
-
a = 10
-b = 32
-
- -
-
-
- -
-
-
-
In [16]:
-
-
-
print("The answer to life, the universe and everything is %d" % (a+b))
-
- -
-
-
- -
-
- - -
-
- -
-
The answer to life, the universe and everything is 42
-
-
-
- -
-
- -
-
-
-
-
-
-

Controlling the Python kernel

The Python process that is interpreting and executing your Python code is referred to as the "kernel" of the notebook (hence the "Kernel" menu above). When you execute a code cell it sends the code to the Python kernel, and then waits for the kernel to send a result back. The Python kernel can only execute one thing at a time. This means that, for slow functions, you could be executing cells too quickly and leave the Python kernel behind.

-

For example, here is a function that fakes a slow function as it goes to sleep for the specified number of seconds.

- -
-
-
-
-
-
In [17]:
-
-
-
def sleep(n):
-    import os
-    os.system("sleep %d" % n)
-
- -
-
-
- -
-
-
-
-
-
-

Below we will have three code cells that each sleep for a long time. Execute all three quickly one after another...

- -
-
-
-
-
-
In [18]:
-
-
-
sleep(30)
-
- -
-
-
- -
-
-
-
In [19]:
-
-
-
sleep(30)
-
- -
-
-
- -
-
-
-
In [20]:
-
-
-
sleep(30)
-
- -
-
-
- -
-
-
-
-
-
-

You should see that there is a [*] next to each code cell. This is because they are all waiting for the Python kernel to finish executing and return.

-

Jupyter is sleeping

-

If you wait you will see each sleep finishing and control then passing to the next.

-

Sometimes you don't want to be patient, and you want to stop the Python kernel from running a function. To stop (interupt) the kernel, either click the "Stop" icon at the top of the screen, or select "Kernel | Interupt" from the menu.

-

Jupyter stop button

-

Other useful buttons up there are "restart the kernel" (same as the menu Kernel | Restart & Clear Output) and the arrows that can move the selected cell up and down the notebook.

-

Have a play. Explore the buttons and menu items. Take a look at the "Help" in the menu. Remember that, if everything goes wrong, you can always click "Stop" or select "Kernel | Restart & Clear Output" from the menu.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

Running external programs in a code cell

You can do more than just execute Python in the code cells. You can run any command you want! Just put an exclamation mark first, and then type the command normally as you would in a terminal. For example, the below cell is running ls

- -
-
-
-
-
-
In [21]:
-
-
-
! ls
-
- -
-
-
- -
-
- - -
-
- -
-
01_jupyter_howto.ipynb	      09_exceptions.ipynb	    data
-02_lists.ipynb		      10_error_handling.ipynb	    images
-03_dictionaries.ipynb	      11_modules.ipynb		    morse.py
-04_functions.ipynb	      12_pandas.ipynb		    __pycache__
-05_objects.ipynb	      13_basic_numpy.ipynb	    superhero2.py
-06_classes.ipynb	      14_more_numpy.ipynb	    superhero.py
-07_documentation.ipynb	      15_matplotlib.ipynb
-08_class_documentation.ipynb  17_regular_expressions.ipynb
-
-
-
- -
-
- -
-
-
-
-
-
-

You can run any command that you want, exactly as you would typing in a terminal. For example, lets use cat to read the LICENSE file...

- -
-
-
-
-
-
In [23]:
-
-
-
! cat LICENSE
-
- -
-
-
- -
-
- - -
-
- -
-
MIT License
-
-Copyright (c) 2018 Matt Williams, Lester Hedges, Christopher Woods
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-
-
- -
-
- -
-
-
-
-
-
-

You can work with directories as well. For example, here we will copy the dioxin.pdb file from data into the current directory and will then view it using viewMolecules...

- -
-
-
-
-
-
In [24]:
-
-
-
! cp data/dioxin.pdb ./
-
- -
-
-
- -
-
-
-
In [25]:
-
-
-
from BioSimSpace import viewMolecules  # importing again in case we restarted the kernel
-viewMolecules("dioxin.pdb")
-
- -
-
-
- -
-
- - -
-
- -
-
Reading molecules from '['dioxin.pdb']'
-Rendering the molecules...
-
-
-
- -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
Out[25]:
- - - -
-
<BioSimSpace.Notebook.view.View at 0x7f07313dfd68>
-
- -
- -
-
- -
-
-
-
-
-
-

Saving the notebook

At the top of the screen is the name of the notebook, and its current save state.

-

Jupyter file info

-

The current save state tells you when the last checkpoint was saved, and whether or not the notebook contains any unsaved changes. Checkpoints provide points in the notebook that you can refer back to, using the menu item "File | Revert to Checkpoint". You are always able to revert to the last checkpoint, meaning that if you make a mistake, you can go back to a previous safe version.

-

If you click "File | Save and Checkpoint", or click the "Save" icon (floppy disk icon on the far left), then the current state of the notebook will be saved. This includes not just the code and documentation you have written, but also all of the outputs and graphs that you have created. This is really useful, as it lets you save your work, and then come back later.

-

For example, what don't you save now, and then close this window and then open the notebook again? You should see that everything you have done, and everything that was output is restored. Did it work?

-

The notebook was saved to a file called 01_jupyter_howto.ipynb. The .ipynb signifies that the file is a Jupyter notebook (formerly called an interactive python notebook). You can change the name by clicking on the name of the notebook above, and then choosing a different name in the window that opens.

-

Alternatively, you can save a copy of this notebook under a different name by clicking "File | Make a Copy..." in the menu. You can then copy this file or send it to someone else. As a notebook contains the code, data, outputs and analysis, sharing notebooks is the closest thing we have to interactive papers. This is a really powerful concept and, I believe, will fundementally change how we report and share the results of computational and data analysis in science.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
-
-
-
-

The Jupyter Hub

When you first logged into this website you launched this notebook by clicking on its name in the Jupyter Hub. This was the page that looked like a file browser and that (hopefully) is still open in one of the tabs in your browser. The Jupyter Hub is not just a file brower. It is the gateway to do a lot more than just interact with a single notebook.

-

The first thing to note is that this Jupyter Hub is running on the cloud and consumes resources. It is important that you click "Logout" (top right) when you have finished, as this will free up the hub to be used by someone else. There is a timer that will automatically log you out 40 minutes after your web browser is closed. However, please remember to log out if you can.

-

Jupyter hub

-

The main view of the hub is the file browser that shows all of your files. You can navigate through the files and directories like any normal file browser. For example, you can see above the 01_jupyter_howto.ipynb file that contains this notebook. You should see that the file has a green icon next to it, as well as a Running indicator on the right. This indicates that the notebook is currently running.

-

Running notebooks consume cloud resources, which are limited. You can only have 1.8 GB of data in memory and have limited CPU. You should thus shutdown any running notebooks that you aren't using any more. To do this, click the checkbox to the left of the notebook filename. This will bring up some buttons at the top of the screen. Click the "Shutdown" button to shut down the notebook.

-

Shutdown running kernels

-

Alternatively, you can go to the "Running" tab to see everything of your's that is currently running in the hub. Click the "Shutdown" button on the right for anything that you aren't using and want to shut down.

-

Running tab

-

Back in the "Files" tab, clicking the checkbox next to a file brings up buttons that let you download files. You can use the hub to download files by selecting them and clicking the "Download" button that will appear. Other buttons will appear depending on the type of file, e.g. "Duplicate", "View", "Edit" etc. The bin icon will delete the file.

-

Download a file

-

You can create new notebooks or files by clicking on the "New" button on the right. This will open a drop-down menu for you to select the type of thing to create.

-

New dropdown

-

As well as letting you create a new jupyter notebook (by clicking "New | Python3"), you can also create new text files ("New | Text File") or new folders ("New | Folder"). Most interestingly, is that you can also create a new bash terminal. If you click "New | Terminal" it will open a new tab in which you will have a fully functional bash terminal shell. This is running on the same cloud server as your notebook. It is 100% completely a fully functional terminal within your web browser. You could even use it to run the normal command-line python ;-)

-

Bash in the browser

- -
-
-
-
-
-
-
-
-

The cloud server on which this is running comes with lots of useful software that you can run from within the bash shell. For example you have;

-
    -
  • update_workshops : This command will update all of the software and workshop material to the latest version. Useful if you delete anything by accident or the tutor needs to make a last-minute fix
  • -
  • git : Git is installed. The workshop material is downloaded and updated from git repositories, e.g. https://github.com/ccpbiosim/python_and_data. You could also use git to push things back to the cloud, e.g. if you want to download lots of files.
  • -
  • anaconda python3 : A full anaconda python3 is installed, with pandas, matplotlib, numpy etc. etc.
  • -
  • wget : This tool lets you quickly download (or should that be sideload?) files onto the cloud server
  • -
  • top : Process monitor. You can use this to see which processes are consuming lots of resources. Any process that uses more than 1.8 GB of memory will automatically be killed by the cloud server. You can kill them yourself using the kill command.
  • -
  • tar and bzip2 : useful for packing/unpacking and compressing/uncompressing files
  • -
  • AmberTools : The full AmberTools suite is installed in directory $AMBERHOME
  • -
  • ProtoMS : The full ProtoMS package is installed in directory $PROTOMSHOME
  • -
  • wham and wham-2d : Tools used for weighted histogram analysis
  • -
  • Sire and BioSimSpace : The Sire and BioSimSpace python simulation framework and workflow node packages.
  • -
-

The cloud server was build with docker. If you want to download it yourself to run on your own computer at home after the workshop then please install docker and type

- -
docker run -it --rm -p 8888:8888 chryswoods/bss-workshop:latest
-

This will download and run the image from dockerhub and will make it available at the web address that will be printed to the screen (e.g. it will look something like http://localhost:8888/?token=641396480e6421eae8b18261d82a75f958fe166e1c8b20a8). Simply open that address in your browser :-). You can see and download the DockerFile used to generate this image from here.

-

Thanks to the Microsoft Azure Kubernetes Service on which this Jupyter image is running.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/02_lists.html b/html/answers/02_lists.html deleted file mode 100644 index 748a296..0000000 --- a/html/answers/02_lists.html +++ /dev/null @@ -1,12686 +0,0 @@ - - - -02_lists - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Lists

Lists provide a simple way to hold a collection of values. You create a list using square brackets. For example, here we can create a list that contains the values "cat", "dog" and "horse"

-
a =[ "cat", "dog", "horse" ]
-
- -
-
-
-
-
-
In [1]:
-
-
-
a = ["cat", "dog", "horse"]
-
- -
-
-
- -
-
-
-
In [2]:
-
-
-
print(a)
-
- -
-
-
- -
-
- - -
-
- -
-
['cat', 'dog', 'horse']
-
-
-
- -
-
- -
-
-
-
-
-
-

You can access the items in the list by placing the index of the item in square brackets. The first item is at index 0

- -
-
-
-
-
-
In [3]:
-
-
-
a[0]
-
- -
-
-
- -
-
- - -
-
Out[3]:
- - - -
-
'cat'
-
- -
- -
-
- -
-
-
-
-
-
-

The second item is at index 1, and the third item at index 2.

- -
-
-
-
-
-
In [4]:
-
-
-
a[2]
-
- -
-
-
- -
-
- - -
-
Out[4]:
- - - -
-
'horse'
-
- -
- -
-
- -
-
-
-
-
-
-

What do you think will happen if we access the item at index 3?

- -
-
-
-
-
-
In [5]:
-
-
-
a[3]
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-IndexError                                Traceback (most recent call last)
-<ipython-input-5-94e7916e7615> in <module>()
-----> 1 a[3]
-
-IndexError: list index out of range
-
-
- -
-
- -
-
-
-
-
-
-

You can also access the items in the list from the back. What do you think is at index -1?

- -
-
-
-
-
-
In [6]:
-
-
-
a[-1]
-
- -
-
-
- -
-
- - -
-
Out[6]:
- - - -
-
'horse'
-
- -
- -
-
- -
-
-
-
-
-
-

What about index -2, -3 or -4?

- -
-
-
-
-
-
In [7]:
-
-
-
a[-3]
-
- -
-
-
- -
-
- - -
-
Out[7]:
- - - -
-
'cat'
-
- -
- -
-
- -
-
-
-
-
-
-

You can add items onto a list by using the .append function. You can find this using tab completion and Python help...

- -
-
-
-
-
-
In [8]:
-
-
-
help(a.append)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on built-in function append:
-
-append(...) method of builtins.list instance
-    L.append(object) -> None -- append object to end
-
-
-
-
- -
-
- -
-
-
-
In [9]:
-
-
-
a.append("fish")
-
- -
-
-
- -
-
-
-
In [10]:
-
-
-
a
-
- -
-
-
- -
-
- - -
-
Out[10]:
- - - -
-
['cat', 'dog', 'horse', 'fish']
-
- -
- -
-
- -
-
-
-
In [11]:
-
-
-
a[3]
-
- -
-
-
- -
-
- - -
-
Out[11]:
- - - -
-
'fish'
-
- -
- -
-
- -
-
-
-
-
-
-

You can put whatever you want into a list, including other lists!

- -
-
-
-
-
-
In [12]:
-
-
-
b = [ 42, 15, a, [7,8,9] ]
-
- -
-
-
- -
-
-
-
In [13]:
-
-
-
b[3]
-
- -
-
-
- -
-
- - -
-
Out[13]:
- - - -
-
[7, 8, 9]
-
- -
- -
-
- -
-
-
-
-
-
-

Putting lists inside lists allows for multidimensional lookup, e.g. can you work out why b[3][2] equals 9?

- -
-
-
-
-
-
In [14]:
-
-
-
b[3][2]
-
- -
-
-
- -
-
- - -
-
Out[14]:
- - - -
-
9
-
- -
- -
-
- -
-
-
-
-
-
-

You can loop over the items in a list using a for loop, e.g.

- -
-
-
-
-
-
In [15]:
-
-
-
for x in a:
-    print(x)
-
- -
-
-
- -
-
- - -
-
- -
-
cat
-dog
-horse
-fish
-
-
-
- -
-
- -
-
-
-
-
-
-

You can get the number of items in the list using the len function.

- -
-
-
-
-
-
In [16]:
-
-
-
len(a)
-
- -
-
-
- -
-
- - -
-
Out[16]:
- - - -
-
4
-
- -
- -
-
- -
-
-
-
-
-
-

You can use this as an alternative way of looping over the elements of a list

- -
-
-
-
-
-
In [17]:
-
-
-
for i in range(0,len(a)):
-    print(a[i])
-
- -
-
-
- -
-
- - -
-
- -
-
cat
-dog
-horse
-fish
-
-
-
- -
-
- -
-
-
-
-
-
-

A string behaves like a list of letters. For example, if we have the string s = "Hello World", then s[0] is "H" and s[-1] is d.

- -
-
-
-
-
-
In [18]:
-
-
-
s = "Hello World"
-
- -
-
-
- -
-
-
-
In [19]:
-
-
-
s[-1]
-
- -
-
-
- -
-
- - -
-
Out[19]:
- - - -
-
'd'
-
- -
- -
-
- -
-
-
-
-
-
-

You can loop over every letter in a string in the same way that you can loop over every item in a list.

- -
-
-
-
-
-
In [20]:
-
-
-
for letter in s:
-    print(letter)
-
- -
-
-
- -
-
- - -
-
- -
-
H
-e
-l
-l
-o
- 
-W
-o
-r
-l
-d
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercises

Exercise 1

Create two Python lists called a and b. Put into these lists the values [2, 4, 6, 8], and [10, 20, 30, 40]. -Check that a[2] equals 6 and b[-1] equals 40. (note that you will need to use the menu "Insert | Insert Cell Below" to insert more cells below to create space for your code)

- -
-
-
-
-
-
In [21]:
-
-
-
a = [2, 4, 6, 8]
-
- -
-
-
- -
-
-
-
In [22]:
-
-
-
b = [10, 20, 30, 40]
-
- -
-
-
- -
-
-
-
In [23]:
-
-
-
a[2] == 6 and b[-1] == 40
-
- -
-
-
- -
-
- - -
-
Out[23]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 2

Now create a loop that loops over each item in a and b and that calculates and prints out the product a[i] * b[i].

- -
-
-
-
-
-
In [24]:
-
-
-
for i in range(0,len(a)):
-    print(a[i] * b[i])
-
- -
-
-
- -
-
- - -
-
- -
-
20
-80
-180
-320
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercise 3

Modify your code to create a list called c. Use the .append function to set c[i] = a[i] * b[i]. Check your code by making sure that c[-1] equals 320.

- -
-
-
-
-
-
In [25]:
-
-
-
c = []
-for i in range(0,len(a)):
-    c.append(a[i] * b[i])
-print(c)
-
- -
-
-
- -
-
- - -
-
- -
-
[20, 80, 180, 320]
-
-
-
- -
-
- -
-
-
-
In [26]:
-
-
-
c[-1] == 320
-
- -
-
-
- -
-
- - -
-
Out[26]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/03_dictionaries.html b/html/answers/03_dictionaries.html deleted file mode 100644 index c261f14..0000000 --- a/html/answers/03_dictionaries.html +++ /dev/null @@ -1,12395 +0,0 @@ - - - -03_dictionaries - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Dictionaries

Dictionaries are another type of Python container. Instead of storing values by index, they store them associated with a key.

-

You create dictionaries using curly brackets, assiging values to their keys using a colon, e.g.

-
a = { "cat" : "mieow", "dog" : "woof", "horse" : "neigh" }
-
- -
-
-
-
-
-
In [1]:
-
-
-
a = { "cat" : "mieow", "dog" : "woof", "horse" : "neigh"}
-
- -
-
-
- -
-
-
-
In [2]:
-
-
-
a
-
- -
-
-
- -
-
- - -
-
Out[2]:
- - - -
-
{'cat': 'mieow', 'dog': 'woof', 'horse': 'neigh'}
-
- -
- -
-
- -
-
-
-
-
-
-

You can look up values in the dictionary by placing the key in square brackets. For example, we can look up the value associated with the key "cat" using a["cat"].

- -
-
-
-
-
-
In [3]:
-
-
-
a["cat"]
-
- -
-
-
- -
-
- - -
-
Out[3]:
- - - -
-
'mieow'
-
- -
- -
-
- -
-
-
-
-
-
-

What happens if the key does not exist?

- -
-
-
-
-
-
In [4]:
-
-
-
a['fish']
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-KeyError                                  Traceback (most recent call last)
-<ipython-input-4-104bafb971c1> in <module>()
-----> 1 a['fish']
-
-KeyError: 'fish'
-
-
- -
-
- -
-
-
-
-
-
-

You insert items into the dictionary by assigning values to keys, e.g.

- -
-
-
-
-
-
In [5]:
-
-
-
a["fish"] = "bubble"
-
- -
-
-
- -
-
-
-
In [6]:
-
-
-
a
-
- -
-
-
- -
-
- - -
-
Out[6]:
- - - -
-
{'cat': 'mieow', 'dog': 'woof', 'fish': 'bubble', 'horse': 'neigh'}
-
- -
- -
-
- -
-
-
-
-
-
-

You can list all of the keys or values of a dictionary using the keys or values functions (which you can find using tab completion and Python help)

- -
-
-
-
-
-
In [7]:
-
-
-
help(a.values)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on built-in function values:
-
-values(...) method of builtins.dict instance
-    D.values() -> an object providing a view on D's values
-
-
-
-
- -
-
- -
-
-
-
In [8]:
-
-
-
a.keys()
-
- -
-
-
- -
-
- - -
-
Out[8]:
- - - -
-
dict_keys(['fish', 'dog', 'horse', 'cat'])
-
- -
- -
-
- -
-
-
-
In [9]:
-
-
-
a.values()
-
- -
-
-
- -
-
- - -
-
Out[9]:
- - - -
-
dict_values(['bubble', 'woof', 'neigh', 'mieow'])
-
- -
- -
-
- -
-
-
-
-
-
-

You can loop over the dictionary by looping over the keys and looking up the values in a for loop, e.g.

- -
-
-
-
-
-
In [10]:
-
-
-
for key in a.keys():
-    print("A %s goes %s" % (key, a[key]))
-
- -
-
-
- -
-
- - -
-
- -
-
A fish goes bubble
-A dog goes woof
-A horse goes neigh
-A cat goes mieow
-
-
-
- -
-
- -
-
-
-
-
-
-

You can put anything as a value into a dictionary, including other dictionaries and even lists. The keys should be either numbers or strings.

- -
-
-
-
-
-
In [11]:
-
-
-
b = { "a" : ["aardvark", "anteater", "antelope"], "b" : ["badger", "beetle"], 26.5: a}
-
- -
-
-
- -
-
-
-
-
-
-

What do you think is at b["a"][-1]? What about b[26.5]["fish"]?

- -
-
-
-
-
-
In [12]:
-
-
-
b[26.5]["fish"]
-
- -
-
-
- -
-
- - -
-
Out[12]:
- - - -
-
'bubble'
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise

Below you have a dictionary that contains the full mapping of every letter to its Morse-code equivalent.

- -
-
-
-
-
-
In [13]:
-
-
-
letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 1

Use the morse code dictionary to look up the morse code for the letters "s" and "o". What is the morse code for "SOS" (the international emergency distress signal)?

- -
-
-
-
-
-
In [14]:
-
-
-
print(letter_to_morse["s"], letter_to_morse["o"], letter_to_morse["s"])
-
- -
-
-
- -
-
- - -
-
- -
-
... --- ...
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercise 2

Here is a string that contains a message that must be converted to morse code. Write a loop that converts each letter into the morse code equivalent, and stores it into a list. Print the list out to see the full morse code message that must be sent. Note that you will need to use the .lower() function to get the lower case of capital letters.

- -
-
-
-
-
-
In [15]:
-
-
-
message = "SOS We have hit an iceberg and need help quickly"
-
- -
-
-
- -
-
-
-
In [16]:
-
-
-
morse = []
-for letter in message:
-    morse.append( letter_to_morse[letter.lower()] )
-print(morse)
-
- -
-
-
- -
-
- - -
-
- -
-
['...', '---', '...', '/', '.--', '.', '/', '....', '.-', '...-', '.', '/', '....', '..', '-', '/', '.-', '-.', '/', '..', '-.-.', '.', '-...', '.', '.-.', '--.', '/', '.-', '-.', '-..', '/', '-.', '.', '.', '-..', '/', '....', '.', '.-..', '.--.', '/', '--.-', '..-', '..', '-.-.', '-.-', '.-..', '-.--']
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercise 3

The inverted form of a dictionary is one where the keys are now looked up by value. For example, the below code inverts letter_to_morse such that the morse code is the key, and the letter is the value.

- -
-
-
-
-
-
In [17]:
-
-
-
morse_to_letter = {}
-for letter in letter_to_morse.keys():
-    morse_to_letter[ letter_to_morse[letter] ] = letter
-
- -
-
-
- -
-
-
-
-
-
-

Check that this code works by verifying that morse_to_letter["..."] equals "s".

-

Next, loop through the morse code message you created in exercise 2 and see if you can convert it back to english. Note that you can join a list of letters together into a string using the code "".join(letters).

- -
-
-
-
-
-
In [18]:
-
-
-
english = []
-for code in morse:
-    english.append( morse_to_letter[code] )
-print("".join(english))
-
- -
-
-
- -
-
- - -
-
- -
-
sos we have hit an iceberg and need help quickly
-
-
-
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/04_functions.html b/html/answers/04_functions.html deleted file mode 100644 index 80fe956..0000000 --- a/html/answers/04_functions.html +++ /dev/null @@ -1,12607 +0,0 @@ - - - -04_functions - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Functions

Functions provide a way to package often-used code into reusable and easy to use components. For example, here is some code that multiplies together two lists

- -
-
-
-
-
-
In [1]:
-
-
-
list1 = [2, 4, 6, 8]
-
- -
-
-
- -
-
-
-
In [2]:
-
-
-
list2 = [10, 20, 30, 40]
-
- -
-
-
- -
-
-
-
In [3]:
-
-
-
list3 = []
-
- -
-
-
- -
-
-
-
In [4]:
-
-
-
for x, y in zip(list1,list2):
-    list3.append(x * y)
-
- -
-
-
- -
-
-
-
In [5]:
-
-
-
list3
-
- -
-
-
- -
-
- - -
-
Out[5]:
- - - -
-
[20, 80, 180, 320]
-
- -
- -
-
- -
-
-
-
-
-
-

We don't want to keep typing out the above code every time we want to multiply the numbers in two lists. Instead, we can collect that code together into a function

- -
-
-
-
-
-
In [6]:
-
-
-
def multiply(a, b):
-    c = []
-    for x,y in zip(a,b):
-        c.append(x*y)
-    return c
-
- -
-
-
- -
-
-
-
-
-
-

We can now call this function directly on our lists, e.g.

- -
-
-
-
-
-
In [7]:
-
-
-
list3 = multiply(list1, list2)
-
- -
-
-
- -
-
-
-
In [8]:
-
-
-
list3
-
- -
-
-
- -
-
- - -
-
Out[8]:
- - - -
-
[20, 80, 180, 320]
-
- -
- -
-
- -
-
-
-
-
-
-

The function is called using its name, and passing in the values as two arguments, e.g.

- -
-
-
-
-
-
In [9]:
-
-
-
list4 = multiply( [1,2,3], [4,5,6] )
-
- -
-
-
- -
-
-
-
In [10]:
-
-
-
list4
-
- -
-
-
- -
-
- - -
-
Out[10]:
- - - -
-
[4, 10, 18]
-
- -
- -
-
- -
-
-
-
-
-
-

The arguments are placed inside the round brackets. These are copied, in order, to the function. For example, [1,2,3] is copied into a, and [4,5,6] is copied as b. The code in the function is then executed. We can watch this code being run by adding in print statements, e.g.

-
def multiply(a, b):
-    print("a = %s" % a)
-    print("b = %s" % b)
-    c = []
-    for x,y in zip(a,b):
-        print("%s times %s equals %s" % (x,y,x*y))
-        c.append(x*y)
-    print("c = %s" % c)
-    return c
-
- -
-
-
-
-
-
-
-
-

You must pass the right number of arguments into a function. For example, this is what happens if you get the number of arguments wrong...

- -
-
-
-
-
-
In [11]:
-
-
-
list5 = multiply(list1)
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-TypeError                                 Traceback (most recent call last)
-<ipython-input-11-2ad4f6a161c6> in <module>()
-----> 1 list5 = multiply(list1)
-
-TypeError: multiply() missing 1 required positional argument: 'b'
-
-
- -
-
- -
-
-
-
-
-
-

You can write functions that take as many (or as few) arguments as you want. For example, here is a function that takes no arguments, and then a function that takes lots

- -
-
-
-
-
-
In [12]:
-
-
-
def func0():
-    return "no arguments to this function"
-
- -
-
-
- -
-
-
-
In [13]:
-
-
-
def func1(a, b, c, d, e=5):
-    return a+b+c+d+e
-
- -
-
-
- -
-
-
-
In [14]:
-
-
-
func0()
-
- -
-
-
- -
-
- - -
-
Out[14]:
- - - -
-
'no arguments to this function'
-
- -
- -
-
- -
-
-
-
In [15]:
-
-
-
func1(1, 2, 3, 4, 5)
-
- -
-
-
- -
-
- - -
-
Out[15]:
- - - -
-
15
-
- -
- -
-
- -
-
-
-
In [16]:
-
-
-
func1(1, 2, 3, 4)
-
- -
-
-
- -
-
- - -
-
Out[16]:
- - - -
-
15
-
- -
- -
-
- -
-
-
-
-
-
-

Note that with the last function we have set a default value of the argument e. This is given the value of 5 if it is not specified. This allows us to pass 4 arguments instead of 5. Changing the default value by editing the definition of the function above will thus change the output of func1 when it is called with only four arguments.

- -
-
-
-
-
-
-
-
-

Exercise

Here is the morse code dictionary from the last session, together with the code that converts a message from english into morse code.

- -
-
-
-
-
-
In [17]:
-
-
-
letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-
- -
-
-
- -
-
-
-
In [18]:
-
-
-
message = "SOS We have hit an iceberg and need help quickly"
-
- -
-
-
- -
-
-
-
In [19]:
-
-
-
morse = []
-for letter in message:
-    morse.append( letter_to_morse[letter.lower()] )
-print(morse)
-
- -
-
-
- -
-
- - -
-
- -
-
['...', '---', '...', '/', '.--', '.', '/', '....', '.-', '...-', '.', '/', '....', '..', '-', '/', '.-', '-.', '/', '..', '-.-.', '.', '-...', '.', '.-.', '--.', '/', '.-', '-.', '-..', '/', '-.', '.', '.', '-..', '/', '....', '.', '.-..', '.--.', '/', '--.-', '..-', '..', '-.-.', '-.-', '.-..', '-.--']
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercise 1

Create a function called encode that takes a message and returns the morse code equivalent. Test this function by encodig the message SOS We have hit an iceberg and need help quickly and check that you get the same result as in the last session. Now try using your function to encode other messages.

- -
-
-
-
-
-
In [20]:
-
-
-
def encode(message):
-    morse = []
-    for letter in message:
-        morse.append( letter_to_morse[letter.lower()] )
-    return morse
-
- -
-
-
- -
-
-
-
In [21]:
-
-
-
encode(message) == morse
-
- -
-
-
- -
-
- - -
-
Out[21]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
In [22]:
-
-
-
encode("Hello World")
-
- -
-
-
- -
-
- - -
-
Out[22]:
- - - -
-
['....', '.', '.-..', '.-..', '---', '/', '.--', '---', '.-.', '.-..', '-..']
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 2

Using the answer from Exercise 2 in the dictionaries lesson, write a function called decode that converts a morse code message back to english. Check that you can decode the above morse code message back to english.

- -
-
-
-
-
-
In [23]:
-
-
-
morse_to_letter = {}
-for letter in letter_to_morse.keys():
-    morse_to_letter[ letter_to_morse[letter] ] = letter    
-
- -
-
-
- -
-
-
-
In [24]:
-
-
-
def decode(morse):
-    english = []
-    for code in morse:
-        english.append( morse_to_letter[code] )
-    return "".join(english)
-
- -
-
-
- -
-
-
-
In [25]:
-
-
-
decode(morse)
-
- -
-
-
- -
-
- - -
-
Out[25]:
- - - -
-
'sos we have hit an iceberg and need help quickly'
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 3

Below is a list of messages. Loop over the messages and check that encode( decode(message) ) equals the original message. Do any of the messages fail to encode and decode correctly? If so, why? How can your check be modified to account for the limitations of your encode and decode functions?

- -
-
-
-
-
-
In [26]:
-
-
-
messages = [ "hello world", "this is a long message", "Oh no this may break", "This message is difficult to encode." ]
-
- -
-
-
- -
-
-
-
In [27]:
-
-
-
for message in messages:
-    print("checking for message '%s'..." % message)
-    print( message == decode( encode(message) ) )
-
- -
-
-
- -
-
- - -
-
- -
-
checking for message 'hello world'...
-True
-checking for message 'this is a long message'...
-True
-checking for message 'Oh no this may break'...
-False
-checking for message 'This message is difficult to encode.'...
-
-
-
- -
-
- -
-
----------------------------------------------------------------------------
-KeyError                                  Traceback (most recent call last)
-<ipython-input-27-58f3e4df6a62> in <module>()
-      1 for message in messages:
-      2     print("checking for message '%s'..." % message)
-----> 3     print( message == decode( encode(message) ) )
-
-<ipython-input-20-39fdf9cb0c17> in encode(message)
-      2     morse = []
-      3     for letter in message:
-----> 4         morse.append( letter_to_morse[letter.lower()] )
-      5     return morse
-
-KeyError: '.'
-
-
- -
-
- -
-
-
-
In [28]:
-
-
-
for message in messages:
-    print("checking for message '%s'..." % message)
-    print( message.lower() == decode(encode(message)) )
-
- -
-
-
- -
-
- - -
-
- -
-
checking for message 'hello world'...
-True
-checking for message 'this is a long message'...
-True
-checking for message 'Oh no this may break'...
-True
-checking for message 'This message is difficult to encode.'...
-
-
-
- -
-
- -
-
----------------------------------------------------------------------------
-KeyError                                  Traceback (most recent call last)
-<ipython-input-28-d00d649e627e> in <module>()
-      1 for message in messages:
-      2     print("checking for message '%s'..." % message)
-----> 3     print( message.lower() == decode(encode(message)) )
-
-<ipython-input-20-39fdf9cb0c17> in encode(message)
-      2     morse = []
-      3     for letter in message:
-----> 4         morse.append( letter_to_morse[letter.lower()] )
-      5     return morse
-
-KeyError: '.'
-
-
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/05_objects.html b/html/answers/05_objects.html deleted file mode 100644 index 7ff5fda..0000000 --- a/html/answers/05_objects.html +++ /dev/null @@ -1,12655 +0,0 @@ - - - -05_objects - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Objects

In the last session you learned how to package up useful code into functions. This is a really useful idea, as it lets you re-use useful code in your own scripts, and to then share useful code with other people.

-

However, it is normal for functions to rely on data. For example, consider the Morse code encode and decode functions in the last lesson. These only work because of the data contained in the letter_to_morse dictionary. The functions would break if anyone changes the data in this dictionary.

- -
-
-
-
-
-
In [1]:
-
-
-
letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-
- -
-
-
- -
-
-
-
In [2]:
-
-
-
def encode(message):
-    morse = []
-    for letter in message:
-        morse.append( letter_to_morse[letter.lower()] )
-    return morse
-
- -
-
-
- -
-
-
-
In [3]:
-
-
-
encode("Hello")
-
- -
-
-
- -
-
- - -
-
Out[3]:
- - - -
-
['....', '.', '.-..', '.-..', '---']
-
- -
- -
-
- -
-
-
-
-
-
-

The above encode("Hello") has worked. However, if we change the data in letter_to_morse, e.g. swapping l from .-.. to -.--, then we get ['....', '.', '-.--', '-.--', '---'], which is wrong. We can make even larger changes, which would completely break the function...

- -
-
-
-
-
-
-
-
-

While such changes are easy to spot in this example, they become more difficult to find in larger programs. In addition, as you share code, you will find that people using your code will do weird things to the data on which it depends, which can introduce weird bugs and problems.

-

The solution is to package a function together with the data on which it depends into a single object. This idea is the foundation of object orientated programming. To explore this, let us start with a simple example that packages the encode function together with the letter_to_morse dictionary on which it depends.

- -
-
-
-
-
-
In [4]:
-
-
-
class Morse:
-    def __init__(self):
-        self._letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-        
-    def encode(self, message):
-        morse = []
-        for letter in message:
-            morse.append( self._letter_to_morse[letter.lower()] )
-        return morse
-
- -
-
-
- -
-
-
-
-
-
-

Above, we have packaged the data (letter_to_morse) together with the encode function into what we call a class. A Class describes how data and functions are combined together. An instance of a class is called an object, which we can create by calling Morse().

- -
-
-
-
-
-
In [5]:
-
-
-
m = Morse()
-
- -
-
-
- -
-
-
-
-
-
-

m is an object of the class Morse. It has its own copy of letter_to_morse within it, and its own copy of the encode function. We can call m's copy of the encode function by typing m.encode(...), e.g.

- -
-
-
-
-
-
In [6]:
-
-
-
m.encode("Hello World")
-
- -
-
-
- -
-
- - -
-
Out[6]:
- - - -
-
['....', '.', '.-..', '.-..', '---', '/', '.--', '---', '.-.', '.-..', '-..']
-
- -
- -
-
- -
-
-
-
-
-
-

To create a new class, you use the class keyword, followed by the name of your class. In this case, class Morse defined a new class called Morse. You then add a colon, and write all of the functions that should be part of the class indented below. At a minimum, you must define one function, called the constructor. This function has the signature def __init__(self, arguments...). The first argument, self, is a special variable that allows an object of the class to access the data that belongs to itself. It is the job of the constructor to set up that data. For example, let's now create a new class that provides a simple guessing game.

- -
-
-
-
-
-
In [7]:
-
-
-
class GuessGame:
-    def __init__(self, secret):
-        self._secret = secret
-        
-    def guess(self, value):
-        if (value == self._secret):
-            print("Well done - you have guessed my secret")
-        else:
-            print("Try again...")
-
- -
-
-
- -
-
-
-
-
-
-

In this class, the constructor __init__(self, secret) takes an extra argument after self. This argument is saved as the _secret variable that is part of the self of the object. Note that we always name variables that are part of a class with a leading underscore. We can construct different object instances of GuessGame that have different secrets, e.g.

- -
-
-
-
-
-
In [8]:
-
-
-
g1 = GuessGame("cat")
-
- -
-
-
- -
-
-
-
In [9]:
-
-
-
g2 = GuessGame("dog")
-
- -
-
-
- -
-
-
-
-
-
-

Here, the self._secret for g1 equals "cat". The self._secret for g2 equals "dog".

-

When we call the function g1.guess(value), it compares value against self._secret for g1.

- -
-
-
-
-
-
In [10]:
-
-
-
g1.guess("dog")
-
- -
-
-
- -
-
- - -
-
- -
-
Try again...
-
-
-
- -
-
- -
-
-
-
In [11]:
-
-
-
g1.guess("cat")
-
- -
-
-
- -
-
- - -
-
- -
-
Well done - you have guessed my secret
-
-
-
- -
-
- -
-
-
-
-
-
-

When we call the function g2.guess(value) it compares value against self._secret for g2.

- -
-
-
-
-
-
In [12]:
-
-
-
g2.guess("cat")
-
- -
-
-
- -
-
- - -
-
- -
-
Try again...
-
-
-
- -
-
- -
-
-
-
In [13]:
-
-
-
g2.guess("dog")
-
- -
-
-
- -
-
- - -
-
- -
-
Well done - you have guessed my secret
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Edit the below GuessGame example so that it records how many unsuccessful guesses have been performed. Add a function called nGuesses() that returns the number of unsuccessful guesses. Once you have made the changes, check your class by creating an object of your class and using it to make some successful and unsuccessful guesses.

- -
-
-
-
-
-
In [14]:
-
-
-
class GuessGame:
-    def __init__(self, secret):
-        self._secret = secret
-        self._nguesses = 0
-        
-    def guess(self, value):
-        if (value == self._secret):
-            print("Well done - you have guessed my secret")
-        else:
-            self._nguesses += 1
-            print("Try again...")
-            
-    def nGuesses(self):
-        return self._nguesses
-
- -
-
-
- -
-
-
-
In [15]:
-
-
-
g = GuessGame("cat")
-
- -
-
-
- -
-
-
-
In [16]:
-
-
-
g.nGuesses() == 0
-
- -
-
-
- -
-
- - -
-
Out[16]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
In [17]:
-
-
-
g.guess("dog")
-
- -
-
-
- -
-
- - -
-
- -
-
Try again...
-
-
-
- -
-
- -
-
-
-
In [18]:
-
-
-
g.nGuesses() == 1
-
- -
-
-
- -
-
- - -
-
Out[18]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
In [19]:
-
-
-
g.guess("horse")
-
- -
-
-
- -
-
- - -
-
- -
-
Try again...
-
-
-
- -
-
- -
-
-
-
In [20]:
-
-
-
g.nGuesses() == 2
-
- -
-
-
- -
-
- - -
-
Out[20]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
In [21]:
-
-
-
g.guess("cat")
-
- -
-
-
- -
-
- - -
-
- -
-
Well done - you have guessed my secret
-
-
-
- -
-
- -
-
-
-
In [22]:
-
-
-
g.nGuesses() == 2
-
- -
-
-
- -
-
- - -
-
Out[22]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 2

Edit the constructor of your GuessGame class so that the user can optionally specify a maximum number of allowable guesses. If the maximum number of guesses is not supplied, then set the default value to 5.

-

Create a maxGuesses() function that returns the maximum number of allowable guesses.

-

Finally, edit the guess() function so that it will not let you make more than the maximum number of guesses (e.g. if the number of guesses exceeds the maximum number, then print out "Sorry, you have run out of guesses.").

-

Check that you code works by creating an object of GuessGame that only allows three guesses, and see what happens if you guess incorrectly more than three times.

- -
-
-
-
-
-
In [23]:
-
-
-
class GuessGame:
-    def __init__(self, secret, max_guesses=5):
-        self._secret = secret
-        self._nguesses = 0
-        self._max_guesses = max_guesses
-    
-    def guess(self, value):
-        if (self.nGuesses() >= self.maxGuesses()):
-            print("Sorry, you have run out of guesses")
-        elif (value == self._secret):
-            print("Well done - you have guessed my secret")
-        else:
-            self._nguesses += 1
-            print("Try again...")
-    
-    def nGuesses(self):
-        return self._nguesses
-    
-    def maxGuesses(self):
-        return self._max_guesses
-
- -
-
-
- -
-
-
-
In [24]:
-
-
-
g = GuessGame("fish", 3)
-
- -
-
-
- -
-
-
-
In [25]:
-
-
-
g.maxGuesses() == 3
-
- -
-
-
- -
-
- - -
-
Out[25]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
In [26]:
-
-
-
g.guess("cat")
-
- -
-
-
- -
-
- - -
-
- -
-
Try again...
-
-
-
- -
-
- -
-
-
-
In [27]:
-
-
-
g.guess("dog")
-
- -
-
-
- -
-
- - -
-
- -
-
Try again...
-
-
-
- -
-
- -
-
-
-
In [28]:
-
-
-
g.guess("horse")
-
- -
-
-
- -
-
- - -
-
- -
-
Try again...
-
-
-
- -
-
- -
-
-
-
In [29]:
-
-
-
g.guess("gerbil")
-
- -
-
-
- -
-
- - -
-
- -
-
Sorry, you have run out of guesses
-
-
-
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/06_classes.html b/html/answers/06_classes.html deleted file mode 100644 index c341282..0000000 --- a/html/answers/06_classes.html +++ /dev/null @@ -1,12784 +0,0 @@ - - - -06_classes - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Classes

Classes allow you to define how to package data with functions to create objects. An object is an instance of a class, which contains its own data, and its own copy of functions that can operate on that data.

-

You use classes to define objects that represent the concepts and things that your program will work with. For example, if your program managed exam results of students, then you may create one class that represents an Exam, and another that represents a Student.

- -
-
-
-
-
-
In [1]:
-
-
-
class Exam:
-    def __init__(self, max_score=100):
-        self._max_score = max_score
-        self._actual_score = 0
-        
-    def percent(self):
-        return 100.0 * self._actual_score / self._max_score
-    
-    def setResult(self, score):
-        if (score < 0):
-            self._actual_score = 0
-        elif (score > self._max_score):
-            self._actual_score = self._max_score
-        else:
-            self._actual_score = score
-    
-    def grade(self):
-        if (self._actual_score == 0):
-            return "U"
-        elif (self.percent() > 90.0):
-            return "A"
-        elif (self.percent() > 80.0):
-            return "B"
-        elif (self.percent() > 70.0):
-            return "C"
-        else:
-            return "F"
-
- -
-
-
- -
-
-
-
In [2]:
-
-
-
class Student:
-    def __init__(self):
-        self._exams = {}
-    
-    def addExam(self, name, exam):
-        self._exams[name] = exam
-        
-    def addResult(self, name, score):
-        self._exams[name].setResult(score)
-    
-    def result(self, exam):
-        return self._exams[exam].percent()
-    
-    def grade(self, exam):
-        return self._exams[exam].grade()
-    
-    def grades(self):
-        g = {}
-        for exam in self._exams.keys():
-            g[exam] = self.grade(exam)
-        return g
-
- -
-
-
- -
-
-
-
-
-
-

We can now create a student, and give them a set of exams that they need to complete.

- -
-
-
-
-
-
In [3]:
-
-
-
s = Student()
-
- -
-
-
- -
-
-
-
In [4]:
-
-
-
s.addExam( "maths", Exam(20) )
-
- -
-
-
- -
-
-
-
In [5]:
-
-
-
s.addExam( "chemistry", Exam(75) )
-
- -
-
-
- -
-
-
-
-
-
-

At this point, the student has not completed any exams, so the grades are all 'U'

- -
-
-
-
-
-
In [6]:
-
-
-
s.grades()
-
- -
-
-
- -
-
- - -
-
Out[6]:
- - - -
-
{'chemistry': 'U', 'maths': 'U'}
-
- -
- -
-
- -
-
-
-
-
-
-

However, we can now add the results...

- -
-
-
-
-
-
In [7]:
-
-
-
s.addResult("maths", 15)
-
- -
-
-
- -
-
-
-
In [8]:
-
-
-
s.addResult("chemistry", 62)
-
- -
-
-
- -
-
-
-
In [9]:
-
-
-
s.grades()
-
- -
-
-
- -
-
- - -
-
Out[9]:
- - - -
-
{'chemistry': 'B', 'maths': 'C'}
-
- -
- -
-
- -
-
-
-
-
-
-

Programming with classes makes the code easier to read, as the code more closely represents the concepts that make up the program. For example, here we have a class that represents a full school of students.

- -
-
-
-
-
-
In [10]:
-
-
-
class School:
-    def __init__(self):
-        self._students = {}
-        self._exams = []
-
-    def addStudent(self, name):
-        self._students[name] = Student()
-
-    def addExam(self, exam, max_score):
-        self._exams.append(exam)
-        
-        for key in self._students.keys():
-            self._students[key].addExam(exam, Exam(max_score))
-    
-    def addResult(self, name, exam, score):
-        self._students[name].addResult(exam, score)
-        
-    def grades(self):
-        g = {}
-        for name in self._students.keys():
-            g[name] = self._students[name].grades()
-        return g
-
- -
-
-
- -
-
-
-
-
-
-

We can now create a whole school of students and manage the exams and results for all of them with some reasonably readable code :-)

- -
-
-
-
-
-
In [11]:
-
-
-
school = School()
-
- -
-
-
- -
-
-
-
In [12]:
-
-
-
school.addStudent("Charlie")
-
- -
-
-
- -
-
-
-
In [13]:
-
-
-
school.addStudent("Matt")
-
- -
-
-
- -
-
-
-
In [14]:
-
-
-
school.addStudent("James")
-
- -
-
-
- -
-
-
-
In [15]:
-
-
-
school.addExam( "maths", 20 )
-
- -
-
-
- -
-
-
-
In [16]:
-
-
-
school.addExam( "physics", 50 )
-
- -
-
-
- -
-
-
-
In [17]:
-
-
-
school.addExam( "english literature", 30 )
-
- -
-
-
- -
-
-
-
In [18]:
-
-
-
school.grades()
-
- -
-
-
- -
-
- - -
-
Out[18]:
- - - -
-
{'Charlie': {'english literature': 'U', 'maths': 'U', 'physics': 'U'},
- 'James': {'english literature': 'U', 'maths': 'U', 'physics': 'U'},
- 'Matt': {'english literature': 'U', 'maths': 'U', 'physics': 'U'}}
-
- -
- -
-
- -
-
-
-
-
-
-

We can now add in the results of the exams, which have been returned to us by the exam markers...

- -
-
-
-
-
-
In [19]:
-
-
-
englit_results = { "Charlie" : 10, "Matt" : 25, "James" : 3 }
-
- -
-
-
- -
-
-
-
In [20]:
-
-
-
phys_results = { "Matt" : 48, "James" : 3 }
-
- -
-
-
- -
-
-
-
In [21]:
-
-
-
maths_results = { "James" : 20, "Matt" : 18, "Charlie" : 4 }
-
- -
-
-
- -
-
-
-
-
-
-

Indeed, we will do this by using a function...

- -
-
-
-
-
-
In [22]:
-
-
-
def add_results(school, exam, results):
-    for student in results.keys():
-        school.addResult(student, exam, results[student])
-
- -
-
-
- -
-
-
-
In [23]:
-
-
-
add_results(school, "english literature", englit_results)
-
- -
-
-
- -
-
-
-
In [24]:
-
-
-
add_results(school, "physics", phys_results)
-
- -
-
-
- -
-
-
-
In [25]:
-
-
-
add_results(school, "maths", maths_results)
-
- -
-
-
- -
-
-
-
In [26]:
-
-
-
school.grades()
-
- -
-
-
- -
-
- - -
-
Out[26]:
- - - -
-
{'Charlie': {'english literature': 'F', 'maths': 'F', 'physics': 'U'},
- 'James': {'english literature': 'F', 'maths': 'A', 'physics': 'F'},
- 'Matt': {'english literature': 'B', 'maths': 'B', 'physics': 'A'}}
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Here is a copy of the Morse class from the last section. Modify this class to add in a decode function that converts Morse code back to english. Check that this class works by seeing if m.decode( m.encode(message) ) == message.lower().

- -
-
-
-
-
-
In [27]:
-
-
-
class Morse:
-    def __init__(self):
-        self._letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-        
-        self._morse_to_letter = {}
-        for letter in self._letter_to_morse.keys():
-            self._morse_to_letter[ self._letter_to_morse[letter] ] = letter
-        
-    def encode(self, message):
-        morse = []
-        for letter in message:
-            morse.append( self._letter_to_morse[letter.lower()] )
-        return morse
-    
-    def decode(self, morse):
-        message = []
-        for code in morse:
-            message.append( self._morse_to_letter[code] )
-        return "".join(message)
-
- -
-
-
- -
-
-
-
In [28]:
-
-
-
m = Morse()
-
- -
-
-
- -
-
-
-
In [29]:
-
-
-
message = "Hello World"
-
- -
-
-
- -
-
-
-
In [30]:
-
-
-
m.decode( m.encode(message) ) == message.lower()
-
- -
-
-
- -
-
- - -
-
Out[30]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 2

Below is a copy of the School class, together with a copy of the code needed to populate an object of that class with students and exam results. Edit the School class to add in the following functions:

-
    -
  • .resits() : this should return the list of exams that each student should resit if they get a "F" or "U" grade.
  • -
  • .prizeStudent() : this should return the name of the student who scored the highest average percent across all of the exams.
  • -
  • .reviseCourse(threshold) : this should return the name of the exam that gets the lowest average score across all students, if the average score is below threshold.
  • -
-

Use these functions to find out which students need to resit which exams, which student should be awarded the annual school prize, and which courses should be revised as the average mark is less than 50%.

- -
-
-
-
-
-
In [31]:
-
-
-
class School:
-    def __init__(self):
-        self._students = {}
-        self._exams = []
-
-    def addStudent(self, name):
-        self._students[name] = Student()
-
-    def addExam(self, exam, max_score):
-        self._exams.append(exam)
-        
-        for key in self._students.keys():
-            self._students[key].addExam(exam, Exam(max_score))
-    
-    def addResult(self, name, exam, score):
-        self._students[name].addResult(exam, score)
-        
-    def grades(self):
-        g = {}
-        for name in self._students.keys():
-            g[name] = self._students[name].grades()
-        return g
-    
-    def resits(self):
-        r = {}
-        for name in self._students.keys():
-            student_resits = []
-            
-            for exam in self._exams:
-                grade = self._students[name].grade(exam)
-                
-                if (grade == "F" or grade == "U"):
-                    student_resits.append(exam)
-                    
-                if len(student_resits) > 0:
-                    r[name] = student_resits
-        
-        return r
-    
-    def prizeStudent(self):
-        prize_score = 0
-        prize_student = None
-        
-        for name in self._students.keys():
-            avg_score = 0
-            for exam in self._exams:
-                avg_score += self._students[name].result(exam)
-            
-            avg_score /= len(self._exams)
-            
-            if avg_score > prize_score:
-                prize_score = avg_score
-                prize_student = name
-                
-        return prize_student
-    
-    def reviseCourse(self, threshold=50):
-        revise_course = {}
-        
-        for exam in self._exams:
-            avg_score = 0
-            for name in self._students.keys():
-                avg_score += self._students[name].result(exam)
-            avg_score /= len(self._students)
-            
-            if avg_score < threshold:
-                revise_course[exam] = avg_score
-                
-        return revise_course
-
- -
-
-
- -
-
-
-
In [32]:
-
-
-
students = ["Charlie", "James", "Matt"]
-
- -
-
-
- -
-
-
-
In [33]:
-
-
-
exams = { "maths" : 20, "physics" : 50, "english literature" : 30 }
-
- -
-
-
- -
-
-
-
In [34]:
-
-
-
results = { "maths" : { "James" : 20, "Matt" : 18, "Charlie" : 4 }, 
-            "physics" : { "Matt" : 48, "James" : 3 },
-            "english literature" : { "Charlie" : 10, "Matt" : 25, "James" : 3 } }
-
- -
-
-
- -
-
-
-
In [35]:
-
-
-
school = School()
-
- -
-
-
- -
-
-
-
In [36]:
-
-
-
for student in students:
-    school.addStudent(student)
-
- -
-
-
- -
-
-
-
In [37]:
-
-
-
for exam in exams.keys():
-    school.addExam(exam, exams[exam])
-
- -
-
-
- -
-
-
-
In [38]:
-
-
-
for exam in results:
-    add_results(school, exam, results[exam])
-
- -
-
-
- -
-
-
-
In [39]:
-
-
-
school.grades()
-
- -
-
-
- -
-
- - -
-
Out[39]:
- - - -
-
{'Charlie': {'english literature': 'F', 'maths': 'F', 'physics': 'U'},
- 'James': {'english literature': 'F', 'maths': 'A', 'physics': 'F'},
- 'Matt': {'english literature': 'B', 'maths': 'B', 'physics': 'A'}}
-
- -
- -
-
- -
-
-
-
In [40]:
-
-
-
school.resits()
-
- -
-
-
- -
-
- - -
-
Out[40]:
- - - -
-
{'Charlie': ['physics', 'maths', 'english literature'],
- 'James': ['physics', 'english literature']}
-
- -
- -
-
- -
-
-
-
In [41]:
-
-
-
school.prizeStudent()
-
- -
-
-
- -
-
- - -
-
Out[41]:
- - - -
-
'Matt'
-
- -
- -
-
- -
-
-
-
In [42]:
-
-
-
school.reviseCourse(50)
-
- -
-
-
- -
-
- - -
-
Out[42]:
- - - -
-
{'english literature': 42.22222222222222, 'physics': 34.0}
-
- -
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/07_documentation.html b/html/answers/07_documentation.html deleted file mode 100644 index 83e1322..0000000 --- a/html/answers/07_documentation.html +++ /dev/null @@ -1,12542 +0,0 @@ - - - -07_documentation - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Documentation

Python has great in-built documentation that is available via the help function. For example

- -
-
-
-
-
-
In [1]:
-
-
-
l = ["cat", "dog", "fish"]
-
- -
-
-
- -
-
-
-
In [2]:
-
-
-
help(l)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on list object:
-
-class list(object)
- |  list() -> new empty list
- |  list(iterable) -> new list initialized from iterable's items
- |  
- |  Methods defined here:
- |  
- |  __add__(self, value, /)
- |      Return self+value.
- |  
- |  __contains__(self, key, /)
- |      Return key in self.
- |  
- |  __delitem__(self, key, /)
- |      Delete self[key].
- |  
- |  __eq__(self, value, /)
- |      Return self==value.
- |  
- |  __ge__(self, value, /)
- |      Return self>=value.
- |  
- |  __getattribute__(self, name, /)
- |      Return getattr(self, name).
- |  
- |  __getitem__(...)
- |      x.__getitem__(y) <==> x[y]
- |  
- |  __gt__(self, value, /)
- |      Return self>value.
- |  
- |  __iadd__(self, value, /)
- |      Implement self+=value.
- |  
- |  __imul__(self, value, /)
- |      Implement self*=value.
- |  
- |  __init__(self, /, *args, **kwargs)
- |      Initialize self.  See help(type(self)) for accurate signature.
- |  
- |  __iter__(self, /)
- |      Implement iter(self).
- |  
- |  __le__(self, value, /)
- |      Return self<=value.
- |  
- |  __len__(self, /)
- |      Return len(self).
- |  
- |  __lt__(self, value, /)
- |      Return self<value.
- |  
- |  __mul__(self, value, /)
- |      Return self*value.n
- |  
- |  __ne__(self, value, /)
- |      Return self!=value.
- |  
- |  __new__(*args, **kwargs) from builtins.type
- |      Create and return a new object.  See help(type) for accurate signature.
- |  
- |  __repr__(self, /)
- |      Return repr(self).
- |  
- |  __reversed__(...)
- |      L.__reversed__() -- return a reverse iterator over the list
- |  
- |  __rmul__(self, value, /)
- |      Return self*value.
- |  
- |  __setitem__(self, key, value, /)
- |      Set self[key] to value.
- |  
- |  __sizeof__(...)
- |      L.__sizeof__() -- size of L in memory, in bytes
- |  
- |  append(...)
- |      L.append(object) -> None -- append object to end
- |  
- |  clear(...)
- |      L.clear() -> None -- remove all items from L
- |  
- |  copy(...)
- |      L.copy() -> list -- a shallow copy of L
- |  
- |  count(...)
- |      L.count(value) -> integer -- return number of occurrences of value
- |  
- |  extend(...)
- |      L.extend(iterable) -> None -- extend list by appending elements from the iterable
- |  
- |  index(...)
- |      L.index(value, [start, [stop]]) -> integer -- return first index of value.
- |      Raises ValueError if the value is not present.
- |  
- |  insert(...)
- |      L.insert(index, object) -- insert object before index
- |  
- |  pop(...)
- |      L.pop([index]) -> item -- remove and return item at index (default last).
- |      Raises IndexError if list is empty or index is out of range.
- |  
- |  remove(...)
- |      L.remove(value) -> None -- remove first occurrence of value.
- |      Raises ValueError if the value is not present.
- |  
- |  reverse(...)
- |      L.reverse() -- reverse *IN PLACE*
- |  
- |  sort(...)
- |      L.sort(key=None, reverse=False) -> None -- stable sort *IN PLACE*
- |  
- |  ----------------------------------------------------------------------
- |  Data and other attributes defined here:
- |  
- |  __hash__ = None
-
-
-
-
- -
-
- -
-
-
-
-
-
-

You can add similar documentation to the functions that you write. You do this by adding in a documentation string as the first string after defining the function e.g.

- -
-
-
-
-
-
In [3]:
-
-
-
def multiply(a, b):
-    """This function returns the element-wise multiplication of the passed lists 'a' and 'b'"""
-    c = []
-    for x,y in zip(a,b):
-        c.append(x*y)
-    return c
-
- -
-
-
- -
-
-
-
In [4]:
-
-
-
multiply( [1,2,3], [4,5,6] )
-
- -
-
-
- -
-
- - -
-
Out[4]:
- - - -
-
[4, 10, 18]
-
- -
- -
-
- -
-
-
-
In [5]:
-
-
-
help(multiply)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function multiply in module __main__:
-
-multiply(a, b)
-    This function returns the element-wise multiplication of the passed lists 'a' and 'b'
-
-
-
-
- -
-
- -
-
-
-
-
-
-

The documentation string should be placed between two sets of triple quotes ("""). This is a convention that makes it easier to expand the documentation later, and that ensures that nothing you write in the documentation will be expanded or interpreted as Python.

-

Documentation should provide an easy to understand, and brief description of what the function does. It should not give information that is obvious by reading the function signature. For example, this is a bad piece of documentation.

- -
-
-
-
-
-
In [6]:
-
-
-
def multiply(a, b):
-    """function multiply(a,b) -> list"""
-    c = []
-    for x,y in zip(a,b):
-        c.append(x*y)
-    return c
-
- -
-
-
- -
-
-
-
In [7]:
-
-
-
help(multiply)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function multiply in module __main__:
-
-multiply(a, b)
-    function multiply(a,b) -> list
-
-
-
-
- -
-
- -
-
-
-
-
-
-

It is much better to say what the function does, and then what it returns (as this can't be seen from the signature). Good documentation would be

- -
-
-
-
-
-
In [8]:
-
-
-
def multiply(a, b):
-    """Calculates the element-wise multiplication of a and b, returning a list of the results"""
-    c = []
-    for x,y in zip(a,b):
-        c.append(x*y)
-    return c
-
- -
-
-
- -
-
-
-
In [9]:
-
-
-
help(multiply)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function multiply in module __main__:
-
-multiply(a, b)
-    Calculates the element-wise multiplication of a and b, returning a list of the results
-
-
-
-
- -
-
- -
-
-
-
-
-
-

Your documentation can span over multiple lines. If you are describing the arguments, then you should use one line per argument, for example

- -
-
-
-
-
-
In [10]:
-
-
-
def make_complex(real, imag=0):
-    """Create and return a complex number
-    
-       Keyword arguments:
-       
-       real -- the real part of the number
-       imag -- the imaginary part of the number
-    """
-    return (real,imag)
-
- -
-
-
- -
-
-
-
In [11]:
-
-
-
help(make_complex)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function make_complex in module __main__:
-
-make_complex(real, imag=0)
-    Create and return a complex number
-    
-    Keyword arguments:
-    
-    real -- the real part of the number
-    imag -- the imaginary part of the number
-
-
-
-
- -
-
- -
-
-
-
-
-
-

By convention, you will notice above that the last """ is placed on its own line if the documentation spans multiple lines. It is on the same line if the documentation is short.

-

In general, keep your documentation short, to the point, and avoid repeating obvious information. However, be precise, as this may be the only part of your code that somebody else reads before they use your function in their program.

-

A good suggestion is to look at documentation you like and try to copy that style. Also, look for code that you think is poorly documented, and try to avoid their mistakes.

- -
-
-
-
-
-
-
-
-

Exercise

Below is a series of undocumented functions. Take a look through the functions and try to work out what they do. Once you understand the functions, write some documentation for each function. Get your neighbour to read your documentation. Do they understand what the function does based on what you have written? Do the function names -combined with your documentation accurately convey the result of calling the function?

-

Note that you may have to use help(...) yourself if you don't recognise some of the code in the functions. Also try to play with the function to see how it behaves.

- -
-
-
-
-
-
In [12]:
-
-
-
def add(a, b):
-    """Calculates the element-wise sum of a and b, returning a list of the results"""
-    c = []
-    for x,y in zip(a,b):
-        c.append(x+y)
-    return c
-
- -
-
-
- -
-
-
-
In [13]:
-
-
-
help(add)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function add in module __main__:
-
-add(a, b)
-    Calculates the element-wise sum of a and b, returning a list of the results
-
-
-
-
- -
-
- -
-
-
-
In [14]:
-
-
-
def subtract(a, b):
-    """Calculates the element-wise ratio of a and b, returning a list of results. This function
-       is badly named, as it DOES NOT return the element-wise difference of a and b!"""
-    c = []
-    for x,y in zip(a,b):
-        c.append(x / y)
-    return c
-
- -
-
-
- -
-
-
-
In [15]:
-
-
-
help(subtract)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function subtract in module __main__:
-
-subtract(a, b)
-    Calculates the element-wise ratio of a and b, returning a list of results. This function
-    is badly named, as it DOES NOT return the element-wise difference of a and b!
-
-
-
-
- -
-
- -
-
-
-
In [16]:
-
-
-
def capitalise(message):
-    """Capitalises every word in message, returning the result"""
-    words = message.split(" ")
-    for i in range(0,len(words)):
-        words[i] = "%s%s" % (words[i][0].upper(), words[i][1:])
-    return " ".join(words)
-
- -
-
-
- -
-
-
-
In [17]:
-
-
-
help(capitalise)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function capitalise in module __main__:
-
-capitalise(message)
-    Capitalises every word in message, returning the result
-
-
-
-
- -
-
- -
-
-
-
In [18]:
-
-
-
def surprise(x):
-    """Prints 'Surprise!' if x is less than a random number between 0 and 1. Returns nothing"""
-    import random
-    if x < random.random():
-        print("Surprise!")
-
- -
-
-
- -
-
-
-
In [19]:
-
-
-
help(surprise)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function surprise in module __main__:
-
-surprise(x)
-    Prints 'Surprise!' if x is less than a random number between 0 and 1. Returns nothing
-
-
-
-
- -
-
- -
-
-
-
-
-
-

For this last function, try calling it via list_interface("ipynb").

- -
-
-
-
-
-
In [20]:
-
-
-
def list_interface(x):
-    """Returns a list of all files in the current directory that start with '0' and end with x"""
-    import glob
-    f = glob.glob("*.%s" % x)
-    l = []
-    for x in f:
-        if x.startswith("0"):
-            l.append(x)
-    return l
-
- -
-
-
- -
-
-
-
In [21]:
-
-
-
help(list_interface)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function list_interface in module __main__:
-
-list_interface(x)
-    Returns a list of all files in the current directory that start with '0' and end with x
-
-
-
-
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/08_class_documentation.html b/html/answers/08_class_documentation.html deleted file mode 100644 index 0f4907f..0000000 --- a/html/answers/08_class_documentation.html +++ /dev/null @@ -1,12741 +0,0 @@ - - - -08_class_documentation - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Documenting Classes

It is almost as easy to document a class as it is to document a function. Simply add docstrings to all of the classes functions, and also below the class name itself. For example, here is a simple documented class

- -
-
-
-
-
-
In [1]:
-
-
-
class Demo:
-    """This class demonstrates how to document a class.
-    
-       This class is just a demonstration, and does nothing.
-       
-       However the principles of documentation are still valid!
-    """
-    
-    def __init__(self, name):
-        """You should document the constructor, saying what it expects to 
-           create a valid class. In this case
-           
-           name -- the name of an object of this class
-        """
-        self._name = name
-    
-    def getName(self):
-        """You should then document all of the member functions, just as
-           you do for normal functions. In this case, returns
-           the name of the object
-        """
-        return self._name
-
- -
-
-
- -
-
-
-
In [2]:
-
-
-
d = Demo("cat")
-
- -
-
-
- -
-
-
-
In [3]:
-
-
-
help(d)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on Demo in module __main__ object:
-
-class Demo(builtins.object)
- |  This class demonstrates how to document a class.
- |  
- |  This class is just a demonstration, and does nothing.
- |  
- |  However the principles of documentation are still valid!
- |  
- |  Methods defined here:
- |  
- |  __init__(self, name)
- |      You should document the constructor, saying what it expects to 
- |      create a valid class. In this case
- |      
- |      name -- the name of an object of this class
- |  
- |  getName(self)
- |      You should then document all of the member functions, just as
- |      you do for normal functions. In this case, returns
- |      the name of the object
- |  
- |  ----------------------------------------------------------------------
- |  Data descriptors defined here:
- |  
- |  __dict__
- |      dictionary for instance variables (if defined)
- |  
- |  __weakref__
- |      list of weak references to the object (if defined)
-
-
-
-
- -
-
- -
-
-
-
-
-
-

Often, when you write a class, you want to hide member data or member functions so that they are only visible within an object of the class. For example, above, the self._name member data should be hidden, as it should only be used by the object.

-

You control the visibility of member functions or member data using an underscore. If the member function or member data name starts with an underscore, then it is hidden. Otherwise, the member data or function is visible.

-

For example, we can hide the getName function by renaming it to _getName

- -
-
-
-
-
-
In [4]:
-
-
-
class Demo:
-    """This class demonstrates how to document a class.
-    
-       This class is just a demonstration, and does nothing.
-       
-       However the principles of documentation are still valid!
-    """
-    
-    def __init__(self, name):
-        """You should document the constructor, saying what it expects to 
-           create a valid class. In this case
-           
-           name -- the name of an object of this class
-        """
-        self._name = name
-    
-    def _getName(self):
-        """You should then document all of the member functions, just as
-           you do for normal functions. In this case, returns
-           the name of the object
-        """
-        return self._name
-
- -
-
-
- -
-
-
-
In [5]:
-
-
-
d = Demo("cat")
-
- -
-
-
- -
-
-
-
In [6]:
-
-
-
help(d)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on Demo in module __main__ object:
-
-class Demo(builtins.object)
- |  This class demonstrates how to document a class.
- |  
- |  This class is just a demonstration, and does nothing.
- |  
- |  However the principles of documentation are still valid!
- |  
- |  Methods defined here:
- |  
- |  __init__(self, name)
- |      You should document the constructor, saying what it expects to 
- |      create a valid class. In this case
- |      
- |      name -- the name of an object of this class
- |  
- |  ----------------------------------------------------------------------
- |  Data descriptors defined here:
- |  
- |  __dict__
- |      dictionary for instance variables (if defined)
- |  
- |  __weakref__
- |      list of weak references to the object (if defined)
-
-
-
-
- -
-
- -
-
-
-
-
-
-

Member functions or data that are hidden are called "private". Member functions or data that are visible are called "public". You should document all public member functions of a class, as these are visible and designed to be used by other people. It is helpful, although not required, to document all of the private member functions of a class, as these will only really be called by you. However, in years to come, you will thank yourself if you still documented them... ;-)

-

While it is possible to make member data public, it is not advised. It is much better to get and set values of member data using public member functions. This makes it easier for you to add checks to ensure that the data is consistent and being used in the right way. For example, compare these two classes that represent a person, and hold their height.

- -
-
-
-
-
-
In [7]:
-
-
-
class Person1:
-    """Class that holds a person's height"""
-    def __init__(self):
-        """Construct a person who has zero height"""
-        self.height = 0
-
- -
-
-
- -
-
-
-
In [8]:
-
-
-
class Person2:
-    """Class that holds a person's height"""
-    def __init__(self):
-        """Construct a person who has zero height"""
-        self._height = 0
-    
-    def setHeight(self, height):
-        """Set the person's height to 'height', returning whether or 
-           not the height was set successfully
-        """
-        if height < 0 or height > 300:
-            print("This is an invalid height! %s" % height)
-            return False
-        else:
-            self._height = height
-            return True
-        
-    def getHeight(self):
-        """Return the person's height"""
-        return self._height
-
- -
-
-
- -
-
-
-
-
-
-

The first example is quicker to write, but it does little to protect itself against a user who attempts to use the class badly.

- -
-
-
-
-
-
In [9]:
-
-
-
p = Person1()
-
- -
-
-
- -
-
-
-
In [10]:
-
-
-
p.height = -50
-
- -
-
-
- -
-
-
-
In [11]:
-
-
-
p.height
-
- -
-
-
- -
-
- - -
-
Out[11]:
- - - -
-
-50
-
- -
- -
-
- -
-
-
-
In [12]:
-
-
-
p.height = "cat"
-
- -
-
-
- -
-
-
-
In [13]:
-
-
-
p.height
-
- -
-
-
- -
-
- - -
-
Out[13]:
- - - -
-
'cat'
-
- -
- -
-
- -
-
-
-
-
-
-

The second example takes more lines of code, but these lines are valuable as they check that the user is using the class correctly. These checks, when combined with good documentation, ensure that your classes can be safely used by others, and that incorrect use will not create difficult-to-find bugs.

- -
-
-
-
-
-
In [14]:
-
-
-
p = Person2()
-
- -
-
-
- -
-
-
-
In [15]:
-
-
-
p.setHeight(-50)
-
- -
-
-
- -
-
- - -
-
- -
-
This is an invalid height! -50
-
-
-
- -
-
Out[15]:
- - - -
-
False
-
- -
- -
-
- -
-
-
-
In [16]:
-
-
-
p.getHeight()
-
- -
-
-
- -
-
- - -
-
Out[16]:
- - - -
-
0
-
- -
- -
-
- -
-
-
-
In [17]:
-
-
-
p.setHeight("cat")
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-TypeError                                 Traceback (most recent call last)
-<ipython-input-17-bce27b7f2122> in <module>()
-----> 1 p.setHeight("cat")
-
-<ipython-input-8-e60e8fef7814> in setHeight(self, height)
-      9            not the height was set successfully
-     10         """
----> 11         if height < 0 or height > 300:
-     12             print("This is an invalid height! %s" % height)
-     13             return False
-
-TypeError: unorderable types: str() < int()
-
-
- -
-
- -
-
-
-
In [18]:
-
-
-
p.getHeight()
-
- -
-
-
- -
-
- - -
-
Out[18]:
- - - -
-
0
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Below is the completed GuessGame class from the previous lesson. Add documentation to this class.

- -
-
-
-
-
-
In [19]:
-
-
-
class GuessGame:
-    """
-        This class provides a simple guessing game. You create an object
-        of the class with its own secret, with the aim that a user
-        then needs to try to guess what the secret is.
-    """
-    def __init__(self, secret, max_guesses=5):
-        """Create a new guess game
-        
-           secret -- the secret that must be guessed
-           max_guesses -- the maximum number of guesses allowed by the user
-        """
-        self._secret = secret
-        self._nguesses = 0
-        self._max_guesses = max_guesses
-    
-    def guess(self, value):
-        """Try to guess the secret. This will print out to the screen whether
-           or not the secret has been guessed.
-        
-           value -- the user-supplied guess
-        """
-        if (self.nGuesses() >= self.maxGuesses()):
-            print("Sorry, you have run out of guesses")
-        elif (value == self._secret):
-            print("Well done - you have guessed my secret")
-        else:
-            self._nguesses += 1
-            print("Try again...")
-    
-    def nGuesses(self):
-        """Return the number of incorrect guesses made so far"""
-        return self._nguesses
-    
-    def maxGuesses(self):
-        """Return the maximum number of incorrect guesses allowed"""
-        return self._max_guesses
-
- -
-
-
- -
-
-
-
In [20]:
-
-
-
help(GuessGame)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on class GuessGame in module __main__:
-
-class GuessGame(builtins.object)
- |  This class provides a simple guessing game. You create an object
- |  of the class with its own secret, with the aim that a user
- |  then needs to try to guess what the secret is.
- |  
- |  Methods defined here:
- |  
- |  __init__(self, secret, max_guesses=5)
- |      Create a new guess game
- |      
- |      secret -- the secret that must be guessed
- |      max_guesses -- the maximum number of guesses allowed by the user
- |  
- |  guess(self, value)
- |      Try to guess the secret. This will print out to the screen whether
- |      or not the secret has been guessed.
- |      
- |      value -- the user-supplied guess
- |  
- |  maxGuesses(self)
- |      Return the maximum number of incorrect guesses allowed
- |  
- |  nGuesses(self)
- |      Return the number of incorrect guesses made so far
- |  
- |  ----------------------------------------------------------------------
- |  Data descriptors defined here:
- |  
- |  __dict__
- |      dictionary for instance variables (if defined)
- |  
- |  __weakref__
- |      list of weak references to the object (if defined)
-
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercise 2

Below is a poorly-written class that uses public member data to store the name and age of a Person. Edit this class so that the member data is made private. Add get and set functions that allow you to safely get and set the name and age.

- -
-
-
-
-
-
In [21]:
-
-
-
class Person:
-    """Class the represents a Person, holding their name and age"""
-    def __init__(self, name="unknown", age=0):
-        """Construct a person with unknown name and an age of 0"""
-        self.setName(name)
-        self.setAge(age)
-        
-    def setName(self, name):
-        """Set the person's name to 'name'"""
-        self._name = str(name)  # str ensures the name is a string
-        
-    def getName(self):
-        """Return the person's name"""
-        return self._name
-    
-    def setAge(self, age):
-        """Set the person's age. This must be a number between 0 and 130"""
-        if (age < 0 or age > 130):
-            print("Cannot set the age to an invalid value: %s" % age)
-            
-        self._age = age
-        
-    def getAge(self):
-        """Return the person's age"""
-        return self._age
-
- -
-
-
- -
-
-
-
In [22]:
-
-
-
p = Person(name="Peter Parker", age=21)
-
- -
-
-
- -
-
-
-
In [23]:
-
-
-
p.getName()
-
- -
-
-
- -
-
- - -
-
Out[23]:
- - - -
-
'Peter Parker'
-
- -
- -
-
- -
-
-
-
In [24]:
-
-
-
p.getAge()
-
- -
-
-
- -
-
- - -
-
Out[24]:
- - - -
-
21
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 3

Add a private member function called _splitName to your Person class that breaks the name into a surname and first name. Add new functions called getFirstName and getSurname that use this function to return the first name and surname of the person.

- -
-
-
-
-
-
In [25]:
-
-
-
class Person:
-    """Class the represents a Person, holding their name and age"""
-    def __init__(self, name="unknown", age=0):
-        """Construct a person with unknown name and an age of 0"""
-        self.setName(name)
-        self.setAge(age)
-        
-    def setName(self, name):
-        """Set the person's name to 'name'"""
-        self._name = str(name)  # str ensures the name is a string
-        
-    def getName(self):
-        """Return the person's name"""
-        return self._name
-    
-    def setAge(self, age):
-        """Set the person's age. This must be a number between 0 and 130"""
-        if (age < 0 or age > 130):
-            print("Cannot set the age to an invalid value: %s" % age)
-            
-        self._age = age
-        
-    def getAge(self):
-        """Return the person's age"""
-        return self._age
-    
-    def _splitName(self):
-        """Private function that splits the name into parts"""
-        return self._name.split(" ")
-    
-    def getFirstName(self):
-        """Return the first name of the person"""
-        return self._splitName()[0]
-    
-    def getSurname(self):
-        """Return the surname of the person"""
-        return self._splitName()[-1]
-
- -
-
-
- -
-
-
-
In [26]:
-
-
-
p = Person(name="Peter Parker", age=21)
-
- -
-
-
- -
-
-
-
In [27]:
-
-
-
p.getFirstName()
-
- -
-
-
- -
-
- - -
-
Out[27]:
- - - -
-
'Peter'
-
- -
- -
-
- -
-
-
-
In [28]:
-
-
-
p.getSurname()
-
- -
-
-
- -
-
- - -
-
Out[28]:
- - - -
-
'Parker'
-
- -
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/09_exceptions.html b/html/answers/09_exceptions.html deleted file mode 100644 index 7689895..0000000 --- a/html/answers/09_exceptions.html +++ /dev/null @@ -1,12485 +0,0 @@ - - - -09_exceptions - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Exceptions

Mistakes and errors happen in computer programs as much as in real life. Like life, how you handle an error in your program shows your level of professionalism, and gives others evidence that they can trust that you have written a program that will work well.

-

In the last section we indicated errors in the Person.setHeight function by printing a message to the screen and returning False to indicate that the call to setHeight had failed.

- -
-
-
-
-
-
In [1]:
-
-
-
class Person:
-    """Class that holds a person's height"""
-    def __init__(self):
-        """Construct a person who has zero height"""
-        self._height = 0
-    
-    def setHeight(self, height):
-        """Set the person's height to 'height', returning whether or 
-           not the height was set successfully
-        """
-        if height < 0 or height > 300:
-            print("This is an invalid height! %s" % height)
-            return False
-        else:
-            self._height = height
-            return True
-        
-    def getHeight(self):
-        """Return the person's height"""
-        return self._height
-
- -
-
-
- -
-
-
-
In [2]:
-
-
-
p = Person()
-
- -
-
-
- -
-
-
-
In [3]:
-
-
-
p.setHeight(-20)
-
- -
-
-
- -
-
- - -
-
- -
-
This is an invalid height! -20
-
-
-
- -
-
Out[3]:
- - - -
-
False
-
- -
- -
-
- -
-
-
-
-
-
-

This is not a good way of indicating an error. The issues with this are;

-
    -
  • How does the person calling getHeight know to check whether the call returns True or False
  • -
  • What if we wanted to return something else? Should we return the error state and the value we want together?
  • -
  • If the error state is not checked, and nobody reads the error message printed to the screen, then the program is broken, as the person has been created with a height of 0.
  • -
-

The solution is to send something to the programmer that they cannot ignore, which indicates that there is an error. That something is called an "exception".

-

Take a look at this simple code that sets the height...

- -
-
-
-
-
-
In [4]:
-
-
-
def setHeight(height):
-    if height < 0 or height > 300:
-        raise ValueError("Invalid height: %s. This should be between 0 and 300" % height)
-        
-    print("Height is set to %s" % height)
-
- -
-
-
- -
-
-
-
In [5]:
-
-
-
setHeight(-5)
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-ValueError                                Traceback (most recent call last)
-<ipython-input-5-d79fd1d8e207> in <module>()
-----> 1 setHeight(-5)
-
-<ipython-input-4-1e09cd9970ae> in setHeight(height)
-      1 def setHeight(height):
-      2     if height < 0 or height > 300:
-----> 3         raise ValueError("Invalid height: %s. This should be between 0 and 300" % height)
-      4 
-      5     print("Height is set to %s" % height)
-
-ValueError: Invalid height: -5. This should be between 0 and 300
-
-
- -
-
- -
-
-
-
-
-
-

When we try to use an invalid value for the height, we raise (or throw) a ValueError exception. This stops the function from continuing, and gives us a very helpful print out of what went wrong, and where.

-

ValueError is just a class. The name of the class provides us with useful information (there was an error with a value in the program). You choose what error you want to raise. Python provides a set of usefully named error classes that you can use:

-
    -
  • IOError : Error raised when you have a problem with IO, e.g. opening or closing files
  • -
  • ZeroDivisionError : Error raised when you divide by zero
  • -
  • TypeError : Error raised when you are using the wrong type, e.g. maybe setting the height to a string
  • -
  • IndexError : Error raised when you are using an invalid index to access a list or other similar container
  • -
  • KeyError : Error raised when you are using an invalid key to access a dictionary or other similar container
  • -
-

A full list of standard Python exceptions is available here.

-

You are free to raise any exception class you want. It is your job as a programmer to choose the one that is most sensible, e.g.

- -
-
-
-
-
-
In [6]:
-
-
-
def setHeight(height):
-    if height < 0 or height > 300:
-        raise ZeroDivisionError("Invalid height: %s. This should be between 0 and 300" % height)
-        
-    print("Height is set to %s" % height)
-
- -
-
-
- -
-
-
-
In [7]:
-
-
-
setHeight(400)
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-ZeroDivisionError                         Traceback (most recent call last)
-<ipython-input-7-2e20bc96d034> in <module>()
-----> 1 setHeight(400)
-
-<ipython-input-6-e27119f86062> in setHeight(height)
-      1 def setHeight(height):
-      2     if height < 0 or height > 300:
-----> 3         raise ZeroDivisionError("Invalid height: %s. This should be between 0 and 300" % height)
-      4 
-      5     print("Height is set to %s" % height)
-
-ZeroDivisionError: Invalid height: 400. This should be between 0 and 300
-
-
- -
-
- -
-
-
-
-
-
-

Using a ZeroDivisionError is a bad choice, as the error has nothing to do with division by zero. A ValueError is the right choice as the error relates to an invalid value passed to the function.

-

You are free to create your own exception classes.

- -
-
-
-
-
-
In [8]:
-
-
-
class InvalidHeightError(Exception):
-    pass
-
- -
-
-
- -
-
-
-
In [9]:
-
-
-
def setHeight(height):
-    if height < 0 or height > 300:
-        raise InvalidHeightError("Invalid height: %s. This should be between 0 and 300" % height)
-        
-    print("Height is set to %s" % height)
-
- -
-
-
- -
-
-
-
In [10]:
-
-
-
setHeight(-10)
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-InvalidHeightError                        Traceback (most recent call last)
-<ipython-input-10-94e46abff43e> in <module>()
-----> 1 setHeight(-10)
-
-<ipython-input-9-ced177df8f8e> in setHeight(height)
-      1 def setHeight(height):
-      2     if height < 0 or height > 300:
-----> 3         raise InvalidHeightError("Invalid height: %s. This should be between 0 and 300" % height)
-      4 
-      5     print("Height is set to %s" % height)
-
-InvalidHeightError: Invalid height: -10. This should be between 0 and 300
-
-
- -
-
- -
-
-
-
-
-
-

Your own exception classes must be declared as derived from type Exception, hence why you have to write class InvalidHeightError(Exception):. As the class doesn't need to do anything else, you can use pass to say that nothing else needs to be added. Note that you can call your error class anything you want. By convention, it is good to end the class name with Error so that other programmers know what it is for.

- -
-
-
-
-
-
-
-
-

Exercise

Here is an extended copy of the Person code from above.

- -
-
-
-
-
-
In [11]:
-
-
-
class Person:
-    """Class that holds a person's height"""
-    def __init__(self, height=0, weight=0):
-        """Construct a person with the specified name, height and weight"""
-        self.setHeight(height)
-        self.setWeight(weight)
-    
-    def setHeight(self, height):
-        """Set the person's height in meters"""
-        if height < 0 or height > 2.5:
-            raise ValueError("Invalid height: %s. This shoud be between 0 and 2.5 meters" % height)
-        self._height = height
-    
-    def setWeight(self, weight):
-        """Set the person's weight in kilograms"""
-        if weight < 0 or weight > 500:
-            raise ValueError("Invalid weight: %s. This should be between 0 and 500 kilograms" % weight)
-        self._weight = weight
-        
-    def getHeight(self):
-        """Return the person's height in meters"""
-        return self._height
-    
-    def getWeight(self):
-        """Return the person's weight in kilograms"""
-        return self._weight
-    
-    def bmi(self):
-        """Return the person's body mass index (bmi)"""
-        return self.getWeight() / self.getHeight()**2
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 1

Edit the above copy of Person to ensure that the .setWeight function only accepts valid weights. A valid weight is any number that is between 0 and 500 kilograms. You should raise a ValueError if the weight is outside this range. For the moment, do not worry about the user supplying a non-numeric weight.

-

Also edit the above copy of Person to ensure that the .setHeight function only accepts valid heights. A valid height is any number that is between 0 and 2.5 meters. You should raise a ValueError if the height is outside this range. For the moment, do not worry about the user supplying a non-numeric height.

-

Check that a ValueError exception is correctly raised if invalid heights or weights are supplied. Also check that the ValueError exception is not raised if a valid height and weight are supplied.

- -
-
-
-
-
-
In [12]:
-
-
-
p = Person(height=2.8, weight=500)
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-ValueError                                Traceback (most recent call last)
-<ipython-input-12-a9b5ab422dd0> in <module>()
-----> 1 p = Person(height=2.8, weight=500)
-
-<ipython-input-11-b83565801c3e> in __init__(self, height, weight)
-      3     def __init__(self, height=0, weight=0):
-      4         """Construct a person with the specified name, height and weight"""
-----> 5         self.setHeight(height)
-      6         self.setWeight(weight)
-      7 
-
-<ipython-input-11-b83565801c3e> in setHeight(self, height)
-      9         """Set the person's height in meters"""
-     10         if height < 0 or height > 2.5:
----> 11             raise ValueError("Invalid height: %s. This shoud be between 0 and 2.5 meters" % height)
-     12         self._height = height
-     13 
-
-ValueError: Invalid height: 2.8. This shoud be between 0 and 2.5 meters
-
-
- -
-
- -
-
-
-
In [13]:
-
-
-
p = Person(height=1.8, weight=501)
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-ValueError                                Traceback (most recent call last)
-<ipython-input-13-431e2b96b8b9> in <module>()
-----> 1 p = Person(height=1.8, weight=501)
-
-<ipython-input-11-b83565801c3e> in __init__(self, height, weight)
-      4         """Construct a person with the specified name, height and weight"""
-      5         self.setHeight(height)
-----> 6         self.setWeight(weight)
-      7 
-      8     def setHeight(self, height):
-
-<ipython-input-11-b83565801c3e> in setWeight(self, weight)
-     15         """Set the person's weight in kilograms"""
-     16         if weight < 0 or weight > 500:
----> 17             raise ValueError("Invalid weight: %s. This should be between 0 and 500 kilograms" % weight)
-     18         self._weight = weight
-     19 
-
-ValueError: Invalid weight: 501. This should be between 0 and 500 kilograms
-
-
- -
-
- -
-
-
-
In [14]:
-
-
-
p = Person(height=1.8, weight=150)
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 2

If you run the following code;

-
p = Person()
-p.bmi()
-
-

it will raise a DivideByZero exception. This is because the calculation involves dividing by the height squared, which is zero in a default-constructed Person. While an exception has been raised, it is not very intuitive for another programmer to debug. A solution is to create your own named exception that provides more information.

-

Create a new exception called NullPersonError, and edit the .bmi() function so that this exception is raised if it is called on a Person whose height or weight is zero.

-

Check that the NullPersonError exception is raised if .bmi() is called on a default-constructed Person. Check that this exception is not raised if .bmi() is called on a properly constructed Person.

- -
-
-
-
-
-
In [15]:
-
-
-
class NullPersonError(Exception):
-    pass
-
- -
-
-
- -
-
-
-
In [16]:
-
-
-
class Person:
-    """Class that holds a person's height"""
-    def __init__(self, height=0, weight=0):
-        """Construct a person with the specified name, height and weight"""
-        self.setHeight(height)
-        self.setWeight(weight)
-    
-    def setHeight(self, height):
-        """Set the person's height in meters"""
-        if height < 0 or height > 2.5:
-            raise ValueError("Invalid height: %s. This shoud be between 0 and 2.5 meters" % height)
-        self._height = height
-    
-    def setWeight(self, weight):
-        """Set the person's weight in kilograms"""
-        if weight < 0 or weight > 500:
-            raise ValueError("Invalid weight: %s. This should be between 0 and 500 kilograms" % weight)
-        self._weight = weight
-        
-    def getHeight(self):
-        """Return the person's height in meters"""
-        return self._height
-    
-    def getWeight(self):
-        """Return the person's weight in kilograms"""
-        return self._weight
-    
-    def bmi(self):
-        """Return the person's body mass index (bmi)"""
-        if (self.getHeight() == 0 or self.getWeight() == 0):
-            raise NullPersonError("Cannot calculate the BMI of a person with zero "
-                                  "height or weight (%s,%s)" % (self.getHeight(),self.getWeight()))
-            
-        return self.getWeight() / self.getHeight()**2
-
- -
-
-
- -
-
-
-
In [17]:
-
-
-
p = Person()
-
- -
-
-
- -
-
-
-
In [18]:
-
-
-
p.bmi()
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-NullPersonError                           Traceback (most recent call last)
-<ipython-input-18-ac90f82815e5> in <module>()
-----> 1 p.bmi()
-
-<ipython-input-16-061742ffc8d5> in bmi(self)
-     30         if (self.getHeight() == 0 or self.getWeight() == 0):
-     31             raise NullPersonError("Cannot calculate the BMI of a person with zero "
----> 32                                   "height or weight (%s,%s)" % (self.getHeight(),self.getWeight()))
-     33 
-     34         return self.getWeight() / self.getHeight()**2
-
-NullPersonError: Cannot calculate the BMI of a person with zero height or weight (0,0)
-
-
- -
-
- -
-
-
-
In [19]:
-
-
-
p = Person(height=1.8, weight=77.5)
-
- -
-
-
- -
-
-
-
In [20]:
-
-
-
p.bmi()
-
- -
-
-
- -
-
- - -
-
Out[20]:
- - - -
-
23.919753086419753
-
- -
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/10_error_handling.html b/html/answers/10_error_handling.html deleted file mode 100644 index b573a8b..0000000 --- a/html/answers/10_error_handling.html +++ /dev/null @@ -1,12432 +0,0 @@ - - - -10_error_handling - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Error Handling

Exceptions are useful for more than just signalling errors. They can also be used to help you handle the error, and potentially even fix the problem (true self-healing program!).

-

Consider this cut down version of the .setHeight function from the last session...

- -
-
-
-
-
-
In [1]:
-
-
-
def setHeight(height):
-    if height < 0 or height > 2.5:
-        raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-    print("setting the height to %s" % height)
-
- -
-
-
- -
-
-
-
-
-
-

The code currently correctly detects if the user supplies a height that is below 0 or above 2.5. However, what about when the user tries to set the height to something that is not a number?

- -
-
-
-
-
-
In [2]:
-
-
-
setHeight("cat")
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-TypeError                                 Traceback (most recent call last)
-<ipython-input-2-a84508710d2d> in <module>()
-----> 1 setHeight("cat")
-
-<ipython-input-1-35fed253919f> in setHeight(height)
-      1 def setHeight(height):
-----> 2     if height < 0 or height > 2.5:
-      3         raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-      4     print("setting the height to %s" % height)
-
-TypeError: unorderable types: str() < int()
-
-
- -
-
- -
-
-
-
-
-
-

We get a weird error message that says we have a TypeError, as you cannot order a string and an integer.

-

One way to address this is to ask that height is converted to a float, using height = float(height)

- -
-
-
-
-
-
In [3]:
-
-
-
def setHeight(height):
-    height = float(height)
-    
-    if height < 0 or height > 2.5:
-        raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-    print("setting the height to %s" % height)
-
- -
-
-
- -
-
-
-
-
-
-

However, this hasn't made the error any easier to understand, as we now get a ValueError raised...

- -
-
-
-
-
-
In [4]:
-
-
-
setHeight("cat")
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-ValueError                                Traceback (most recent call last)
-<ipython-input-4-a84508710d2d> in <module>()
-----> 1 setHeight("cat")
-
-<ipython-input-3-9c5d65499179> in setHeight(height)
-      1 def setHeight(height):
-----> 2     height = float(height)
-      3 
-      4     if height < 0 or height > 2.5:
-      5         raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-
-ValueError: could not convert string to float: 'cat'
-
-
- -
-
- -
-
-
-
-
-
-

The solution is for us to handle the exception, using a try...except block

- -
-
-
-
-
-
In [5]:
-
-
-
def setHeight(height):
-    try:
-        height = float(height)
-    except:
-        raise TypeError("Invalid height: '%s'. You can only set the height to a numeric value" % height)
-    
-    if height < 0 or height > 2.5:
-        raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-    print("setting the height to %s" % height)
-
- -
-
-
- -
-
-
-
In [6]:
-
-
-
setHeight("cat")
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-ValueError                                Traceback (most recent call last)
-<ipython-input-5-65dfe4215877> in setHeight(height)
-      2     try:
-----> 3         height = float(height)
-      4     except:
-
-ValueError: could not convert string to float: 'cat'
-
-During handling of the above exception, another exception occurred:
-
-TypeError                                 Traceback (most recent call last)
-<ipython-input-6-a84508710d2d> in <module>()
-----> 1 setHeight("cat")
-
-<ipython-input-5-65dfe4215877> in setHeight(height)
-      3         height = float(height)
-      4     except:
-----> 5         raise TypeError("Invalid height: '%s'. You can only set the height to a numeric value" % height)
-      6 
-      7     if height < 0 or height > 2.5:
-
-TypeError: Invalid height: 'cat'. You can only set the height to a numeric value
-
-
- -
-
- -
-
-
-
-
-
-

What's happened here? The try: line starts a try-block. The code that is in the try-block is run. If any of this code raises an exception, then execution stops in the try-block, and switches instead to the code in the except-block (everything within the except: block). In our case, float(height) raised an exception, so execution jumped to the except-block, in which we ran the raise TypeError(...) code.

-

Now the error is much more informative, allowing the user to better understand what has gone wrong. However, exception handling can do more than this. It can allow you to fix the problem. Consider this example...

- -
-
-
-
-
-
In [7]:
-
-
-
setHeight("1.8 m")
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-ValueError                                Traceback (most recent call last)
-<ipython-input-5-65dfe4215877> in setHeight(height)
-      2     try:
-----> 3         height = float(height)
-      4     except:
-
-ValueError: could not convert string to float: '1.8 m'
-
-During handling of the above exception, another exception occurred:
-
-TypeError                                 Traceback (most recent call last)
-<ipython-input-7-17945eae5425> in <module>()
-----> 1 setHeight("1.8 m")
-
-<ipython-input-5-65dfe4215877> in setHeight(height)
-      3         height = float(height)
-      4     except:
-----> 5         raise TypeError("Invalid height: '%s'. You can only set the height to a numeric value" % height)
-      6 
-      7     if height < 0 or height > 2.5:
-
-TypeError: Invalid height: '1.8 m'. You can only set the height to a numeric value
-
-
- -
-
- -
-
-
-
-
-
-

We as humans can see that this could be an acceptable input. However, the computer needs help to understand. We can add code to the except-block that can try to resolve the problem. For example, imagine we had a function that could interpret heights from strings...

- -
-
-
-
-
-
In [8]:
-
-
-
def string_to_height(height):
-    """This function tries to interpret the passed argument as a height 
-       in meters. The format should be 'X m', 'X meter' or 'X meters',
-       where 'X' is a number
-    """
-    # convert height to a string - this always works
-    height = str(height)
-        
-    words = height.split(" ")
-            
-    if len(words) == 2:
-        if words[1] == "m" or words[1] == "meter" or words[1] == "meters":
-            try:
-                return float(words[0])
-            except:
-                pass
-    
-    # Getting here means that we haven't been able to extract a valid height
-    raise TypeError("Cannot extract a valid height from '%s'" % height)
-
- -
-
-
- -
-
-
-
-
-
-

We can now call this function from within the except-block of setHeight

- -
-
-
-
-
-
In [9]:
-
-
-
def setHeight(height):
-    try:
-        height = float(height)
-    except:
-        height = string_to_height(height)
-    
-    if height < 0 or height > 2.5:
-        raise ValueError("Invalid height: %s. This should be between 0 and 2.5 m" % height)
-    print("setting the height to %s" % height)
-
- -
-
-
- -
-
-
-
In [10]:
-
-
-
setHeight("1.8 m")
-
- -
-
-
- -
-
- - -
-
- -
-
setting the height to 1.8
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Here is a copy of the Person class from the last session. Edit the setHeight function so that it uses exception handling and the string_to_height function to correctly interpret heights such as "1.8 m", and so that it gives a useful error message if it is given something weird. Check that the function correctly responds to a range of valid and invalid inputs.

- -
-
-
-
-
-
In [11]:
-
-
-
class Person:
-    """Class that holds a person's height"""
-    def __init__(self, height=0, weight=0):
-        """Construct a person with the specified name, height and weight"""
-        self.setHeight(height)
-        self.setWeight(weight)
-    
-    def setHeight(self, height):
-        """Set the person's height in meters"""
-        try:
-            height = float(height)
-        except:
-            height = string_to_height(height)
-            
-        if height < 0 or height > 2.5:
-            raise ValueError("Invalid height: %s. This shoud be between 0 and 2.5 meters" % height)
-        self._height = height
-    
-    def setWeight(self, weight):
-        """Set the person's weight in kilograms"""
-        if weight < 0 or weight > 500:
-            raise ValueError("Invalid weight: %s. This should be between 0 and 500 kilograms" % weight)
-        self._weight = weight
-        
-    def getHeight(self):
-        """Return the person's height in meters"""
-        return self._height
-    
-    def getWeight(self):
-        """Return the person's weight in kilograms"""
-        return self._weight
-    
-    def bmi(self):
-        """Return the person's body mass index (bmi)"""
-        if (self.getHeight() == 0 or self.getWeight() == 0):
-            raise NullPersonError("Cannot calculate the BMI of a person with zero "
-                                  "height or weight (%s,%s)" % (self.getHeight(),self.getWeight()))
-            
-        return self.getWeight() / self.getHeight()**2
-
- -
-
-
- -
-
-
-
In [12]:
-
-
-
p = Person(height="cat", weight=20)
-
- -
-
-
- -
-
- - -
-
- -
-
----------------------------------------------------------------------------
-ValueError                                Traceback (most recent call last)
-<ipython-input-11-575813bc64f8> in setHeight(self, height)
-     10         try:
----> 11             height = float(height)
-     12         except:
-
-ValueError: could not convert string to float: 'cat'
-
-During handling of the above exception, another exception occurred:
-
-TypeError                                 Traceback (most recent call last)
-<ipython-input-12-1d63e4e12dc7> in <module>()
-----> 1 p = Person(height="cat", weight=20)
-
-<ipython-input-11-575813bc64f8> in __init__(self, height, weight)
-      3     def __init__(self, height=0, weight=0):
-      4         """Construct a person with the specified name, height and weight"""
-----> 5         self.setHeight(height)
-      6         self.setWeight(weight)
-      7 
-
-<ipython-input-11-575813bc64f8> in setHeight(self, height)
-     11             height = float(height)
-     12         except:
----> 13             height = string_to_height(height)
-     14 
-     15         if height < 0 or height > 2.5:
-
-<ipython-input-8-0aa7ec4a5285> in string_to_height(height)
-     17 
-     18     # Getting here means that we haven't been able to extract a valid height
----> 19     raise TypeError("Cannot extract a valid height from '%s'" % height)
-
-TypeError: Cannot extract a valid height from 'cat'
-
-
- -
-
- -
-
-
-
-
-
-

Exercise 2

Create a string_to_weight function that interprets weights in kilograms (e.g. "5 kg", "5 kilos" or "5 kilograms"). Now edit the Person.setWeight function so that it uses exception handling and string_to_weight to to correctly interpret weights such as 35.5 kg and gives a useful error message if it is given something weird. Check that your function responds correctly to a range of valid and invalid inputs.

- -
-
-
-
-
-
In [13]:
-
-
-
def string_to_weight(weight):
-    """This function tries to interpret the passed argument as a weight 
-       in kilograms. The format should be 'X kg' 'X kilogram' or 'X kilograms',
-       where 'X' is a number
-    """
-    # convert weight to a string - this always works
-    weight = str(weight)
-        
-    words = weight.split(" ")
-            
-    if len(words) == 2:
-        if words[1] == "kg" or words[1] == "kilogram" or words[1] == "kilograms" \
-            or words[1] == "kilo" or words[1] == "kilos":
-            try:
-                return float(words[0])
-            except:
-                pass
-    
-    # Getting here means that we haven't been able to extract a valid weight
-    raise TypeError("Cannot extract a valid weight from '%s'" % weight)
-
- -
-
-
- -
-
-
-
In [14]:
-
-
-
class Person:
-    """Class that holds a person's height"""
-    def __init__(self, height=0, weight=0):
-        """Construct a person with the specified name, height and weight"""
-        self.setHeight(height)
-        self.setWeight(weight)
-    
-    def setHeight(self, height):
-        """Set the person's height in meters"""
-        try:
-            height = float(height)
-        except:
-            height = string_to_height(height)
-            
-        if height < 0 or height > 2.5:
-            raise ValueError("Invalid height: %s. This shoud be between 0 and 2.5 meters" % height)
-        self._height = height
-    
-    def setWeight(self, weight):
-        """Set the person's weight in kilograms"""
-        try:
-            weight = float(weight)
-        except:
-            weight = string_to_weight(weight)
-        if weight < 0 or weight > 500:
-            raise ValueError("Invalid weight: %s. This should be between 0 and 500 kilograms" % weight)
-        self._weight = weight
-        
-    def getHeight(self):
-        """Return the person's height in meters"""
-        return self._height
-    
-    def getWeight(self):
-        """Return the person's weight in kilograms"""
-        return self._weight
-    
-    def bmi(self):
-        """Return the person's body mass index (bmi)"""
-        if (self.getHeight() == 0 or self.getWeight() == 0):
-            raise NullPersonError("Cannot calculate the BMI of a person with zero "
-                                  "height or weight (%s,%s)" % (self.getHeight(),self.getWeight()))
-            
-        return self.getWeight() / self.getHeight()**2
-
- -
-
-
- -
-
-
-
In [15]:
-
-
-
p = Person(weight="55.6 kilos", height="1.5 meters")
-
- -
-
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/11_modules.html b/html/answers/11_modules.html deleted file mode 100644 index 8b84daa..0000000 --- a/html/answers/11_modules.html +++ /dev/null @@ -1,12675 +0,0 @@ - - - -11_modules - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Modules

You can turn any Python script that you write into a module that other people can import and use in their own code.

-

For example;

- -
-
-
-
-
-
In [1]:
-
-
-
import superhero
-
- -
-
-
- -
-
- - -
-
- -
-
Is it a bird? Is it a plane? No, it's Superman!!!
-Superman will battle Lex Luther. The winner is Superman
-Lex steals some krytonite...
-They battle again... The winner is Lex Luther
-
-
-
- -
-
- -
-
-
-
-
-
-

What has happened here???

-

There is a file in your current directory called superhero.py. The line import superhero will look in the current directory, to find a file called superhero.py. It then runs this file, just as if you had typed it into the screen.

- -
-
-
-
-
-
-
-
-

This is just a simple Python script, which we can print out using

- -
-
-
-
-
-
In [2]:
-
-
-
! cat superhero.py
-
- -
-
-
- -
-
- - -
-
- -
-
"""
-This module provides a set of classes for creating superheros
-and supervillains. Have fun!
-
-Author - Christopher Woods
-License - BSD
-"""
-
-class Superhero:
-    """This class allows you to create your own Superhero"""
-    def __init__(self, name, weakness):
-        """Construct a superhero with the specified name and the 
-           specified weakness
-        """
-        self.setName(name)
-        self.setWeakness(weakness)
-
-    def setName(self, name):
-        """Set the name of the superhero"""
-        self._name = name
-
-    def setWeakness(self, weakness):
-        """Set the weakness of the superhero"""
-        self._weakness = weakness
-
-    def getName(self):
-        """Return the name of the superhero"""
-        return self._name
-
-    def getWeakness(self):
-        """Return the weakness of the superhero"""
-        return self._weakness
-
-    def isVulnerableTo(self, item):
-        """Return whether or not this superhero is 
-           vulnerable to 'item'"""
-        return self.getWeakness().lower() == item.lower()
-
-class Supervillain:
-    """This class allows you to create your own supervillain"""
-    def __init__(self, name):
-        self.setName(name)
-        self._loot = []
-
-    def setName(self, name):
-        """Set the name of the villain"""
-        self._name = name
-
-    def getName(self):
-        """Return the name of the villain"""
-        return self._name
-
-    def steal(self, item):
-        """Tell the villain to steal 'item'"""
-        self._loot.append(item)
-
-    def getLoot(self):
-        """Return all of the loot that this villain has stolen"""
-        return self._loot
-
-def battle(superhero, supervillain):
-    """This function will pitch the superhero and villain
-       into battle, and will return the name of whoever wins!
-    """
-
-    try:
-        for possession in supervillain.getLoot():
-            if superhero.isVulnerableTo(possession):
-                return supervillain.getName()
-        return superhero.getName()
-    except Exception as e:
-        # Draw, so no-one won!
-        return "No-one, because %s" % e
-
-superman = Superhero(name="Superman", weakness="kryptonite")
-
-print("Is it a bird? Is it a plane? No, it's %s!!!" % superman.getName())
-
-lex = Supervillain(name="Lex Luther")
-
-print("%s will battle %s. The winner is %s" \
-  % (superman.getName(), lex.getName(), \
-     battle(superman, lex) ) )
-
-print("Lex steals some krytonite...")
-lex.steal("kryptonite")
-
-print("They battle again... The winner is %s" \
-   % battle(superman, lex))
-
-
-
-
-
- -
-
- -
-
-
-
-
-
-

We can get help on the module using help

- -
-
-
-
-
-
In [3]:
-
-
-
help(superhero)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on module superhero:
-
-NAME
-    superhero
-
-DESCRIPTION
-    This module provides a set of classes for creating superheros
-    and supervillains. Have fun!
-    
-    Author - Christopher Woods
-    License - BSD
-
-CLASSES
-    builtins.object
-        Superhero
-        Supervillain
-    
-    class Superhero(builtins.object)
-     |  This class allows you to create your own Superhero
-     |  
-     |  Methods defined here:
-     |  
-     |  __init__(self, name, weakness)
-     |      Construct a superhero with the specified name and the 
-     |      specified weakness
-     |  
-     |  getName(self)
-     |      Return the name of the superhero
-     |  
-     |  getWeakness(self)
-     |      Return the weakness of the superhero
-     |  
-     |  isVulnerableTo(self, item)
-     |      Return whether or not this superhero is 
-     |      vulnerable to 'item'
-     |  
-     |  setName(self, name)
-     |      Set the name of the superhero
-     |  
-     |  setWeakness(self, weakness)
-     |      Set the weakness of the superhero
-     |  
-     |  ----------------------------------------------------------------------
-     |  Data descriptors defined here:
-     |  
-     |  __dict__
-     |      dictionary for instance variables (if defined)
-     |  
-     |  __weakref__
-     |      list of weak references to the object (if defined)
-    
-    class Supervillain(builtins.object)
-     |  This class allows you to create your own supervillain
-     |  
-     |  Methods defined here:
-     |  
-     |  __init__(self, name)
-     |      Initialize self.  See help(type(self)) for accurate signature.
-     |  
-     |  getLoot(self)
-     |      Return all of the loot that this villain has stolen
-     |  
-     |  getName(self)
-     |      Return the name of the villain
-     |  
-     |  setName(self, name)
-     |      Set the name of the villain
-     |  
-     |  steal(self, item)
-     |      Tell the villain to steal 'item'
-     |  
-     |  ----------------------------------------------------------------------
-     |  Data descriptors defined here:
-     |  
-     |  __dict__
-     |      dictionary for instance variables (if defined)
-     |  
-     |  __weakref__
-     |      list of weak references to the object (if defined)
-
-FUNCTIONS
-    battle(superhero, supervillain)
-        This function will pitch the superhero and villain
-        into battle, and will return the name of whoever wins!
-
-DATA
-    lex = <superhero.Supervillain object>
-    superman = <superhero.Superhero object>
-
-FILE
-    /home/workshops/python_and_data/answers/superhero.py
-
-
-
-
-
- -
-
- -
-
-
-
-
-
-

This documentation comes from the class and function documentation put into the file.

-

You can also use the data, classes and functions in the file, e.g.

- -
-
-
-
-
-
In [4]:
-
-
-
ironman = superhero.Superhero(name="Iron Man", weakness="rust")
-
- -
-
-
- -
-
-
-
In [5]:
-
-
-
superhero.battle(ironman, superhero.lex)
-
- -
-
-
- -
-
- - -
-
Out[5]:
- - - -
-
'Iron Man'
-
- -
- -
-
- -
-
-
-
In [6]:
-
-
-
superhero.lex.steal("rust")
-
- -
-
-
- -
-
-
-
In [7]:
-
-
-
superhero.battle(ironman, superhero.lex)
-
- -
-
-
- -
-
- - -
-
Out[7]:
- - - -
-
'Lex Luther'
-
- -
- -
-
- -
-
-
-
-
-
-

One thing to note is that all of the classes, functions and data in the script has been imported into its own namespace, named after the script (e.g. superhero.). We can import the file and put all names into the current namespace using

- -
-
-
-
-
-
In [8]:
-
-
-
from superhero import *
-
- -
-
-
- -
-
-
-
In [9]:
-
-
-
battle(ironman, lex)
-
- -
-
-
- -
-
- - -
-
Out[9]:
- - - -
-
'Lex Luther'
-
- -
- -
-
- -
-
-
-
-
-
-

While any python script can be imported as a module, there are a few conventions you should follow that will make your module easier for others to use.

-
    -
  • Add documentation to the module. As you can see, there is a docstring at the top of superhero.py, which is the first thing written out by help(). This should provide an overview of the module.
  • -
  • Avoid actually running any code or creating any variables. The current superhero.py is bad as it does this, which is why you see "Is it a bird..." printed when you import it!
  • -
-

The way to avoid creating any variables or running code is to let the script detect when it is being imported, and to not create any variables if that is the case.

-

You can detect if your Python script is not being imported using

-
if __name__ == "__main__":
-    print("I am not being imported.")
-
- -
-
-
-
-
-
In [10]:
-
-
-
if __name__ == "__main__":
-    print("I am not being imported")
-
- -
-
-
- -
-
- - -
-
- -
-
I am not being imported
-
-
-
- -
-
- -
-
-
-
-
-
-

To show how this works, there is a superhero2.py script, which is identical to superhero.py, except all code that should not be run on import is hidden inside the if __name__ == "__main__": block.

- -
-
-
-
-
-
In [11]:
-
-
-
! cat superhero2.py
-
- -
-
-
- -
-
- - -
-
- -
-
"""
-This module provides a set of classes for creating superheros
-and supervillains. Have fun!
-
-Author - Christopher Woods
-License - BSD
-"""
-
-class Superhero:
-    """This class allows you to create your own Superhero"""
-    def __init__(self, name, weakness):
-        """Construct a superhero with the specified name and the 
-           specified weakness
-        """
-        self.setName(name)
-        self.setWeakness(weakness)
-
-    def setName(self, name):
-        """Set the name of the superhero"""
-        self._name = name
-
-    def setWeakness(self, weakness):
-        """Set the weakness of the superhero"""
-        self._weakness = weakness
-
-    def getName(self):
-        """Return the name of the superhero"""
-        return self._name
-
-    def getWeakness(self):
-        """Return the weakness of the superhero"""
-        return self._weakness
-
-    def isVulnerableTo(self, item):
-        """Return whether or not this superhero is 
-           vulnerable to 'item'"""
-        return self.getWeakness().lower() == item.lower()
-
-class Supervillain:
-    """This class allows you to create your own supervillain"""
-    def __init__(self, name):
-        self.setName(name)
-        self._loot = []
-
-    def setName(self, name):
-        """Set the name of the villain"""
-        self._name = name
-
-    def getName(self):
-        """Return the name of the villain"""
-        return self._name
-
-    def steal(self, item):
-        """Tell the villain to steal 'item'"""
-        self._loot.append(item)
-
-    def getLoot(self):
-        """Return all of the loot that this villain has stolen"""
-        return self._loot
-
-def battle(superhero, supervillain):
-    """This function will pitch the superhero and villain
-       into battle, and will return the name of whoever wins!
-    """
-
-    try:
-        for possession in supervillain.getLoot():
-            if superhero.isVulnerableTo(possession):
-                return supervillain.getName()
-        return superhero.getName()
-    except Exception as e:
-        # Draw, so no-one won!
-        return "No-one, because %s" % e
-
-if __name__ == "__main__":
-    superman = Superhero(name="Superman", weakness="kryptonite")
-
-    print("Is it a bird? Is it a plane? No, it's %s!!!" % superman.getName())
-
-    lex = Supervillain(name="Lex Luther")
-
-    print("%s will battle %s. The winner is %s" \
-       % (superman.getName(), lex.getName(), \
-          battle(superman, lex) ) )
-
-    print("Lex steals some krytonite...")
-    lex.steal("kryptonite")
-
-    print("They battle again... The winner is %s" \
-        % battle(superman, lex))
-
-
-
-
- -
-
- -
-
-
-
In [12]:
-
-
-
import superhero2
-
- -
-
-
- -
-
-
-
-
-
-

By using if __name__ == "__main__": we have prevented superhero2.py from printing anything out when it is imported, and have also prevented it from creating the variables lex and superman.

-

You can see this by running the superhero2.py script directory, e.g. using

- -
! python superhero2.py
- -
-
-
-
-
-
In [13]:
-
-
-
! python superhero2.py
-
- -
-
-
- -
-
- - -
-
- -
-
Is it a bird? Is it a plane? No, it's Superman!!!
-Superman will battle Lex Luther. The winner is Superman
-Lex steals some krytonite...
-They battle again... The winner is Lex Luther
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Use the "New Text File" option in the Jupyter Home to create a new python text file called morse.py. Copy the below class into this file.

-
class Morse:
-    def __init__(self):
-        self._letter_to_morse = {'a':'.-', 'b':'-...', 'c':'-.-.', 'd':'-..', 'e':'.', 'f':'..-.',
-                   'g':'--.', 'h':'....', 'i':'..', 'j':'.---', 'k':'-.-', 'l':'.-..', 'm':'--',
-                   'n':'-.', 'o':'---', 'p':'.--.', 'q':'--.-', 'r':'.-.', 's':'...', 't':'-',
-                   'u':'..-', 'v':'...-', 'w':'.--', 'x':'-..-', 'y':'-.--', 'z':'--..',
-                   '0':'-----', '1':'.----', '2':'..---', '3':'...--', '4':'....-',
-                   '5':'.....', '6':'-....', '7':'--...', '8':'---..', '9':'----.',
-                   ' ':'/' }
-
-        self._morse_to_letter = {}
-        for letter in self._letter_to_morse.keys():
-            self._morse_to_letter[ self._letter_to_morse[letter] ] = letter
-
-    def encode(self, message):
-        morse = []
-        for letter in message:
-            morse.append( self._letter_to_morse[letter.lower()] )
-        return morse
-
-    def decode(self, morse):
-        message = []
-        for code in morse:
-            message.append( self._morse_to_letter[code] )
-        return "".join(message)
-
-

Add documentation to this class, and to the module. Next, import the module and get help using the commands

-
import morse
-help(morse)
-
-

Does your documentation make sense?

- -
-
-
-
-
-
In [14]:
-
-
-
import morse
-
- -
-
-
- -
-
-
-
In [15]:
-
-
-
help(morse)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on module morse:
-
-NAME
-    morse
-
-CLASSES
-    builtins.object
-        Morse
-    
-    class Morse(builtins.object)
-     |  Class that can encode and decode messages to/from Morse code
-     |  
-     |  Methods defined here:
-     |  
-     |  __init__(self)
-     |      Initialize self.  See help(type(self)) for accurate signature.
-     |  
-     |  decode(self, morse)
-     |      Decodes a passed list of Morse code letters and returns the decoded string
-     |  
-     |  encode(self, message)
-     |      Encode the passed message into Morse code. Returns a list of Morse 
-     |      code letters
-     |  
-     |  ----------------------------------------------------------------------
-     |  Data descriptors defined here:
-     |  
-     |  __dict__
-     |      dictionary for instance variables (if defined)
-     |  
-     |  __weakref__
-     |      list of weak references to the object (if defined)
-
-FILE
-    /home/workshops/python_and_data/answers/morse.py
-
-
-
-
-
- -
-
- -
-
-
-
-
-
-

Exercise 2

Create some checks of your module that should not be run when the module is imported (i.e. only run directly). The checks should be, e.g.

-
morse = Morse()
-
-    for message in ["Hello world", "something to encode", "test message"]:
-        test = morse.decode( morse.encode(message) )
-
-        if message.lower() == test: 
-            print("Success: %s" % message)
-        else:
-            print("Failed: %s" % message)
-
-

Validate that the check doesn't run on import using

-
import morse
-
-

Validate that the check runs from the command line using

- -
! python morse.py
- -
-
-
-
-
-
In [16]:
-
-
-
! python morse.py
-
- -
-
-
- -
-
- - -
-
- -
-
Success: Hello world
-Success: something to encode
-Success: test message
-
-
-
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/12_pandas.html b/html/answers/12_pandas.html deleted file mode 100644 index f9ed8fd..0000000 --- a/html/answers/12_pandas.html +++ /dev/null @@ -1,14973 +0,0 @@ - - - -12_pandas - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Pandas

Pandas is a library providing high-performance, easy-to-use data structures and data analysis tools. The core of pandas is its dataframe which is essentially a table of data. Pandas provides easy and powerful ways to import data from a variety of sources and export it to just as many. It is also explicitly designed to handle missing data elegantly which is a very common problem in data from the real world.

-

The offical pandas documentation is very comprehensive and you will be answer a lot of questions in there, however, it can sometimes be hard to find the right page. Don't be afraid to use Google to find help.

- -
-
-
-
-
-
-
-
-

Pandas has a standard convention for importing it which you will see used in a lot of documentation so we will follow that in this course:

- -
-
-
-
-
-
In [1]:
-
-
-
import pandas as pd
-from pandas import Series, DataFrame
-
- -
-
-
- -
-
-
-
-
-
-

Series

The simplest of pandas' data structures is the Series. It is a one-dimensional list-like structure. -Let's create one from a list:

- -
-
-
-
-
-
In [2]:
-
-
-
Series([14, 7, 3, -7, 8])
-
- -
-
-
- -
-
- - -
-
Out[2]:
- - - -
-
0    14
-1     7
-2     3
-3    -7
-4     8
-dtype: int64
-
- -
- -
-
- -
-
-
-
-
-
-

There are three main components to this output. -The first column (0, 2, etc.) is the index, by default this is numbers each row starting from zero. -The second column is our data, stored i the same order we entered it in our list. -Finally at the bottom there is the dtype which stands for 'data type' which is telling us that all our data is being stored as a 64-bit integer. -Usually you can ignore the dtype until you start doing more advanced things.

-

In the first example above we allowed pandas to automatically create an index for our Series (this is the 0, 1, 2, etc. in the left column) but often you will want to specify one yourself

- -
-
-
-
-
-
In [3]:
-
-
-
s = Series([14, 7, 3, -7, 8], index=['a', 'b', 'c', 'd', 'e'])
-print(s)
-
- -
-
-
- -
-
- - -
-
- -
-
a    14
-b     7
-c     3
-d    -7
-e     8
-dtype: int64
-
-
-
- -
-
- -
-
-
-
-
-
-

We can use this index to retrieve individual rows

- -
-
-
-
-
-
In [4]:
-
-
-
s['a']
-
- -
-
-
- -
-
- - -
-
Out[4]:
- - - -
-
14
-
- -
- -
-
- -
-
-
-
-
-
-

to replace values in the series

- -
-
-
-
-
-
In [5]:
-
-
-
s['c'] = -1
-
- -
-
-
- -
-
-
-
-
-
-

or to get a set of rows

- -
-
-
-
-
-
In [6]:
-
-
-
s[['a', 'c', 'd']]
-
- -
-
-
- -
-
- - -
-
Out[6]:
- - - -
-
a    14
-c    -1
-d    -7
-dtype: int64
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 1

    -
  • Create a Pandas Series with 10 or so elements where the indices are years and the values are numbers.
  • -
  • Experiment with retrieving elements from the Series.
  • -
  • Try making another Series with duplicate values in the index, what happens when you access those elements?
  • -
  • How does a Pandas Series differ from a Python list or dict?
  • -
- -
-
-
-
-
-
In [7]:
-
-
-
# Answer
-
-ex1_1 = Series([4, 7.1, 7.3, 7.8, 8.1], index=[2000, 2001, 2002, 2003, 2004])
-
-ex1_1
-
- -
-
-
- -
-
- - -
-
Out[7]:
- - - -
-
2000    4.0
-2001    7.1
-2002    7.3
-2003    7.8
-2004    8.1
-dtype: float64
-
- -
- -
-
- -
-
-
-
In [8]:
-
-
-
# Answer
-
-ex1_1[2002]
-
- -
-
-
- -
-
- - -
-
Out[8]:
- - - -
-
7.2999999999999998
-
- -
- -
-
- -
-
-
-
In [9]:
-
-
-
# Answer
-
-ex1_1[[2001, 2004]]
-
- -
-
-
- -
-
- - -
-
Out[9]:
- - - -
-
2001    7.1
-2004    8.1
-dtype: float64
-
- -
- -
-
- -
-
-
-
In [10]:
-
-
-
# Answer
-
-ex1_2 = Series([4, 7.1, 7.3, 7.8, 8.1], index=[2000, 2001, 2004, 2001, 2004])
-
-ex1_2
-
- -
-
-
- -
-
- - -
-
Out[10]:
- - - -
-
2000    4.0
-2001    7.1
-2004    7.3
-2001    7.8
-2004    8.1
-dtype: float64
-
- -
- -
-
- -
-
-
-
In [11]:
-
-
-
# Answer
-
-ex1_2[2001]
-
- -
-
-
- -
-
- - -
-
Out[11]:
- - - -
-
2001    7.1
-2001    7.8
-dtype: float64
-
- -
- -
-
- -
-
-
-
-
-
-

Series operations

A Series is list-like in the sense that it is an ordered set of values. It is also dict-like since its entries can be accessed via key lookup. One very important way in which is differs is how it allows operations to be done over the whole Series in one go, a technique often referred to as 'broadcasting'.

-

A simple example is wanting to double the value of every entry in a set of data. In standard Python, you might have a list like

- -
-
-
-
-
-
In [12]:
-
-
-
my_list = [3, 6, 8, 4, 10]
-
- -
-
-
- -
-
-
-
-
-
-

If you wanted to double every entry you might try simply multiplying the list by 2:

- -
-
-
-
-
-
In [13]:
-
-
-
my_list * 2
-
- -
-
-
- -
-
- - -
-
Out[13]:
- - - -
-
[3, 6, 8, 4, 10, 3, 6, 8, 4, 10]
-
- -
- -
-
- -
-
-
-
-
-
-

but as you can see, that simply duplicated the elements. Instead you would have to use a for loop or a list comprehension:

- -
-
-
-
-
-
In [14]:
-
-
-
[i * 2 for i in my_list]
-
- -
-
-
- -
-
- - -
-
Out[14]:
- - - -
-
[6, 12, 16, 8, 20]
-
- -
- -
-
- -
-
-
-
-
-
-

With a pandas Series, however, you can perform bulk mathematical operations to the whole series in one go:

- -
-
-
-
-
-
In [15]:
-
-
-
my_series = Series(my_list)
-print(my_series)
-
- -
-
-
- -
-
- - -
-
- -
-
0     3
-1     6
-2     8
-3     4
-4    10
-dtype: int64
-
-
-
- -
-
- -
-
-
-
In [16]:
-
-
-
my_series * 2
-
- -
-
-
- -
-
- - -
-
Out[16]:
- - - -
-
0     6
-1    12
-2    16
-3     8
-4    20
-dtype: int64
-
- -
- -
-
- -
-
-
-
-
-
-

As well as bulk modifications, you can perform bulk selections by putting more complex statements in the square brackets:

- -
-
-
-
-
-
In [17]:
-
-
-
s[s < 0]  # All negative entries
-
- -
-
-
- -
-
- - -
-
Out[17]:
- - - -
-
c   -1
-d   -7
-dtype: int64
-
- -
- -
-
- -
-
-
-
In [18]:
-
-
-
s[(s * 2) > 4]  # All entries which, when doubled are greater than 4
-
- -
-
-
- -
-
- - -
-
Out[18]:
- - - -
-
a    14
-b     7
-e     8
-dtype: int64
-
- -
- -
-
- -
-
-
-
-
-
-

These operations work because the Series index selection can be passed a series of True and False values which it then uses to filter the result:

- -
-
-
-
-
-
In [19]:
-
-
-
(s * 2) > 4
-
- -
-
-
- -
-
- - -
-
Out[19]:
- - - -
-
a     True
-b     True
-c    False
-d    False
-e     True
-dtype: bool
-
- -
- -
-
- -
-
-
-
-
-
-

Here you can see that the rows a, b and e are True while the others are False. Passing this to s[...] will only show rows that are True.

- -
-
-
-
-
-
-
-
-

Multi-Series operations

It is also possible to perform operations between two Series objects:

- -
-
-
-
-
-
In [20]:
-
-
-
s2 = Series([23,5,34,7,5])
-s3 = Series([7, 6, 5,4,3])
-s2 - s3
-
- -
-
-
- -
-
- - -
-
Out[20]:
- - - -
-
0    16
-1    -1
-2    29
-3     3
-4     2
-dtype: int64
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 2

    -
  • Create two Series objects of equal length with no specified index and containing any values you like. Perform some mathematical operations on them and experiment to make sure it works how you think.
  • -
  • What happens then you perform an operation on two series which have different lengths? How does this change when you give the series some indices?
  • -
  • Using the Series from the first exercise with the years for the index, Select all entries with even-numbered years. Also, select all those with odd-numbered years.
  • -
- -
-
-
-
-
-
In [21]:
-
-
-
# Answer
-
-ex2_1a = Series([1,3,5,7,4,3])
-ex2_1b = Series([3,7,9,4,2,4])
-
-ex2_1a + ex2_1b
-
- -
-
-
- -
-
- - -
-
Out[21]:
- - - -
-
0     4
-1    10
-2    14
-3    11
-4     6
-5     7
-dtype: int64
-
- -
- -
-
- -
-
-
-
In [22]:
-
-
-
# Answer
-
-ex2_2a = Series([1,3,5,7,4,3])
-ex2_2b = Series([3,7,9,4])
-
-ex2_2a + ex2_2b
-
- -
-
-
- -
-
- - -
-
Out[22]:
- - - -
-
0     4.0
-1    10.0
-2    14.0
-3    11.0
-4     NaN
-5     NaN
-dtype: float64
-
- -
- -
-
- -
-
-
-
In [23]:
-
-
-
# Answer
-
-ex2_2a = Series([1,3,5,7,4,3], index=[1,2,3,4,5,6])
-ex2_2b = Series([3,7,9,4], index=[1,3,5,7])
-
-ex2_2a + ex2_2b
-
- -
-
-
- -
-
- - -
-
Out[23]:
- - - -
-
1     4.0
-2     NaN
-3    12.0
-4     NaN
-5    13.0
-6     NaN
-7     NaN
-dtype: float64
-
- -
- -
-
- -
-
-
-
In [24]:
-
-
-
ex1_1[ex1_1.index % 2 == 0]
-
- -
-
-
- -
-
- - -
-
Out[24]:
- - - -
-
2000    4.0
-2002    7.3
-2004    8.1
-dtype: float64
-
- -
- -
-
- -
-
-
-
-
-
-

DataFrame

While you can think of the Series as a one-dimensional list of data, pandas' DataFrame is a two (or possibly more) dimensional table of data. You can think of each column in the table as being a Series.

- -
-
-
-
-
-
In [25]:
-
-
-
data = {'city': ['Paris', 'Paris', 'Paris', 'Paris',
-                 'London', 'London', 'London', 'London',
-                 'Rome', 'Rome', 'Rome', 'Rome'],
-        'year': [2001, 2008, 2009, 2010,
-                 2001, 2006, 2011, 2015,
-                 2001, 2006, 2009, 2012],
-        'pop': [2.148, 2.211, 2.234, 2.244,
-                7.322, 7.657, 8.174, 8.615,
-                2.547, 2.627, 2.734, 2.627]}
-df = DataFrame(data)
-
- -
-
-
- -
-
-
-
-
-
-

This has created a DataFrame from the dictionary data. The keys will become the column headers and the values will be the values in each column. As with the Series, an index will be created automatically.

- -
-
-
-
-
-
In [26]:
-
-
-
df
-
- -
-
-
- -
-
- - -
-
Out[26]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
citypopyear
0Paris2.1482001
1Paris2.2112008
2Paris2.2342009
3Paris2.2442010
4London7.3222001
5London7.6572006
6London8.1742011
7London8.6152015
8Rome2.5472001
9Rome2.6272006
10Rome2.7342009
11Rome2.6272012
-
-
- -
- -
-
- -
-
-
-
-
-
-

Or, if you just want a peek at the data, you can just grab the first few rows with:

- -
-
-
-
-
-
In [27]:
-
-
-
df.head(3)
-
- -
-
-
- -
-
- - -
-
Out[27]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
citypopyear
0Paris2.1482001
1Paris2.2112008
2Paris2.2342009
-
-
- -
- -
-
- -
-
-
-
-
-
-

Since we passed in a dictionary to the DataFrame constructor, the order of the columns will not necessarilly match the order in which you defined them. To enforce a certain order, you can pass a columns argument to the constructor giving a list of the columns in the order you want them:

- -
-
-
-
-
-
In [28]:
-
-
-
DataFrame(data, columns=['year', 'city', 'pop'])
-
- -
-
-
- -
-
- - -
-
Out[28]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
yearcitypop
02001Paris2.148
12008Paris2.211
22009Paris2.234
32010Paris2.244
42001London7.322
52006London7.657
62011London8.174
72015London8.615
82001Rome2.547
92006Rome2.627
102009Rome2.734
112012Rome2.627
-
-
- -
- -
-
- -
-
-
-
-
-
-

When we accessed elements from a Series object, it would select an element by row. However, by default DataFrames index primarily by column. You can access any column directly by using square brackets or by named attributes:

- -
-
-
-
-
-
In [29]:
-
-
-
df['year']
-
- -
-
-
- -
-
- - -
-
Out[29]:
- - - -
-
0     2001
-1     2008
-2     2009
-3     2010
-4     2001
-5     2006
-6     2011
-7     2015
-8     2001
-9     2006
-10    2009
-11    2012
-Name: year, dtype: int64
-
- -
- -
-
- -
-
-
-
In [30]:
-
-
-
df.city
-
- -
-
-
- -
-
- - -
-
Out[30]:
- - - -
-
0      Paris
-1      Paris
-2      Paris
-3      Paris
-4     London
-5     London
-6     London
-7     London
-8       Rome
-9       Rome
-10      Rome
-11      Rome
-Name: city, dtype: object
-
- -
- -
-
- -
-
-
-
-
-
-

Accessing a column like this returns a Series which will act in the same way as those we were using earlier.

-

Note that there is one additional part to this output, Name: city. Pandas has remembered that this Series was created from the 'city' column in the DataFrame.

- -
-
-
-
-
-
In [31]:
-
-
-
type(df.city)
-
- -
-
-
- -
-
- - -
-
Out[31]:
- - - -
-
pandas.core.series.Series
-
- -
- -
-
- -
-
-
-
In [32]:
-
-
-
df.city == 'Paris'
-
- -
-
-
- -
-
- - -
-
Out[32]:
- - - -
-
0      True
-1      True
-2      True
-3      True
-4     False
-5     False
-6     False
-7     False
-8     False
-9     False
-10    False
-11    False
-Name: city, dtype: bool
-
- -
- -
-
- -
-
-
-
-
-
-

This has created a new Series which has True set where the city is Paris and False elsewhere.

-

We can use filtered Series like this to filter the DataFrame as a whole. df.city == 'Paris' has returned a Series containing booleans. Passing it back into df as an indexing operation will use it to filter based on the 'city' column.

- -
-
-
-
-
-
In [33]:
-
-
-
df[df.city == 'Paris']
-
- -
-
-
- -
-
- - -
-
Out[33]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
citypopyear
0Paris2.1482001
1Paris2.2112008
2Paris2.2342009
3Paris2.2442010
-
-
- -
- -
-
- -
-
-
-
-
-
-

You can then carry on and grab another column after that filter:

- -
-
-
-
-
-
In [34]:
-
-
-
df[df.city == 'Paris'].year
-
- -
-
-
- -
-
- - -
-
Out[34]:
- - - -
-
0    2001
-1    2008
-2    2009
-3    2010
-Name: year, dtype: int64
-
- -
- -
-
- -
-
-
-
-
-
-

If you want to select a row from a DataFrame then you can use the .loc attribute which allows you to pass index values like:

- -
-
-
-
-
-
In [35]:
-
-
-
df.loc[2]
-
- -
-
-
- -
-
- - -
-
Out[35]:
- - - -
-
city    Paris
-pop     2.234
-year     2009
-Name: 2, dtype: object
-
- -
- -
-
- -
-
-
-
In [36]:
-
-
-
df.loc[2]['city']
-
- -
-
-
- -
-
- - -
-
Out[36]:
- - - -
-
'Paris'
-
- -
- -
-
- -
-
-
-
-
-
-

Adding new columns

New columns can be added to a DataFrame simply by assigning them by index (as you would for a Python dict) and can be deleted with the del keyword in the same way:

- -
-
-
-
-
-
In [37]:
-
-
-
df['continental'] = df.city != 'London'
-df
-
- -
-
-
- -
-
- - -
-
Out[37]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
citypopyearcontinental
0Paris2.1482001True
1Paris2.2112008True
2Paris2.2342009True
3Paris2.2442010True
4London7.3222001False
5London7.6572006False
6London8.1742011False
7London8.6152015False
8Rome2.5472001True
9Rome2.6272006True
10Rome2.7342009True
11Rome2.6272012True
-
-
- -
- -
-
- -
-
-
-
In [38]:
-
-
-
del df['continental']
-
- -
-
-
- -
-
-
-
-
-
-

Exercise 3

    -
  • Create the DataFrame containing the census data for the three cities.
  • -
  • Select the data for the year 2001. Which city had the smallest population that year?
  • -
  • Find all the cities which had a population smaller than 2.6 million.
  • -
- -
-
-
-
-
-
In [39]:
-
-
-
# Answer
-
-print(df[df.year == 2001])
-
- -
-
-
- -
-
- - -
-
- -
-
     city    pop  year
-0   Paris  2.148  2001
-4  London  7.322  2001
-8    Rome  2.547  2001
-
-
-
- -
-
- -
-
-
-
In [40]:
-
-
-
# Answer
-
-print(df[df['pop'] < 2.6])
-
- -
-
-
- -
-
- - -
-
- -
-
    city    pop  year
-0  Paris  2.148  2001
-1  Paris  2.211  2008
-2  Paris  2.234  2009
-3  Paris  2.244  2010
-8   Rome  2.547  2001
-
-
-
- -
-
- -
-
-
-
-
-
-

Reading from file

One of the msot common situations is that you have some data file containing the data you want to read. Perhaps this is data you've produced yourself or maybe it's from a collegue. In an ideal world the file will be perfectly formatted and will be trivial to import into pandas but since this is so often not the case, it provides a number of features to make your ife easier.

-

Full information on reading and writing is available in the pandas manual on IO tools but first it's worth noting the common formats that pandas can work with:

-
    -
  • Comma separated tables (or tab-separated or space-separated etc.)
  • -
  • Excel spreadsheets
  • -
  • HDF5 files
  • -
  • SQL databases
  • -
-

For this course we will focus on plain-text CSV files as they are perhaps the most common format. Imagine we have a CSV file like (you can download this file from city_pop.csv):

- -
-
-
-
-
-
In [41]:
-
-
-
! cat data/city_pop.csv  # Uses the IPython 'magic' !cat to print the file
-
- -
-
-
- -
-
- - -
-
- -
-
This is an example CSV file
-The text at the top here is not part of the data but instead is here
-to describe the file. You'll see this quite often in real-world data.
-A -1 signifies a missing value.
-
-year;London;Paris;Rome
-2001;7.322;2.148;2.547
-2006;7.652;;2.627
-2008;-1;2.211;
-2009;-1;2.234;2.734
-2011;8.174;;
-2012;-1;2.244;2.627
-2015;8.615;;
-
-
-
- -
-
- -
-
-
-
-
-
-

We can use the pandas function read_csv() to read the file and convert it to a DataFrame. Full documentation for this function can be found in the manual or, as with any Python object, directly in the notebook by putting a ? after the name:

- -
-
-
-
-
-
In [42]:
-
-
-
help(pd.read_csv)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on function read_csv in module pandas.io.parsers:
-
-read_csv(filepath_or_buffer, sep=',', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, iterator=False, chunksize=None, compression='infer', thousands=None, decimal=b'.', lineterminator=None, quotechar='"', quoting=0, escapechar=None, comment=None, encoding=None, dialect=None, tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, skipfooter=0, skip_footer=0, doublequote=True, delim_whitespace=False, as_recarray=None, compact_ints=None, use_unsigned=None, low_memory=True, buffer_lines=None, memory_map=False, float_precision=None)
-    Read CSV (comma-separated) file into DataFrame
-    
-    Also supports optionally iterating or breaking of the file
-    into chunks.
-    
-    Additional help can be found in the `online docs for IO Tools
-    <http://pandas.pydata.org/pandas-docs/stable/io.html>`_.
-    
-    Parameters
-    ----------
-    filepath_or_buffer : str, pathlib.Path, py._path.local.LocalPath or any object with a read() method (such as a file handle or StringIO)
-        The string could be a URL. Valid URL schemes include http, ftp, s3, and
-        file. For file URLs, a host is expected. For instance, a local file could
-        be file ://localhost/path/to/table.csv
-    sep : str, default ','
-        Delimiter to use. If sep is None, the C engine cannot automatically detect
-        the separator, but the Python parsing engine can, meaning the latter will
-        be used and automatically detect the separator by Python's builtin sniffer
-        tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
-        different from ``'\s+'`` will be interpreted as regular expressions and
-        will also force the use of the Python parsing engine. Note that regex
-        delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``
-    delimiter : str, default ``None``
-        Alternative argument name for sep.
-    delim_whitespace : boolean, default False
-        Specifies whether or not whitespace (e.g. ``' '`` or ``'    '``) will be
-        used as the sep. Equivalent to setting ``sep='\s+'``. If this option
-        is set to True, nothing should be passed in for the ``delimiter``
-        parameter.
-    
-        .. versionadded:: 0.18.1 support for the Python parser.
-    
-    header : int or list of ints, default 'infer'
-        Row number(s) to use as the column names, and the start of the
-        data.  Default behavior is to infer the column names: if no names
-        are passed the behavior is identical to ``header=0`` and column
-        names are inferred from the first line of the file, if column
-        names are passed explicitly then the behavior is identical to
-        ``header=None``. Explicitly pass ``header=0`` to be able to
-        replace existing names. The header can be a list of integers that
-        specify row locations for a multi-index on the columns
-        e.g. [0,1,3]. Intervening rows that are not specified will be
-        skipped (e.g. 2 in this example is skipped). Note that this
-        parameter ignores commented lines and empty lines if
-        ``skip_blank_lines=True``, so header=0 denotes the first line of
-        data rather than the first line of the file.
-    names : array-like, default None
-        List of column names to use. If file contains no header row, then you
-        should explicitly pass header=None. Duplicates in this list will cause
-        a ``UserWarning`` to be issued.
-    index_col : int or sequence or False, default None
-        Column to use as the row labels of the DataFrame. If a sequence is given, a
-        MultiIndex is used. If you have a malformed file with delimiters at the end
-        of each line, you might consider index_col=False to force pandas to _not_
-        use the first column as the index (row names)
-    usecols : array-like or callable, default None
-        Return a subset of the columns. If array-like, all elements must either
-        be positional (i.e. integer indices into the document columns) or strings
-        that correspond to column names provided either by the user in `names` or
-        inferred from the document header row(s). For example, a valid array-like
-        `usecols` parameter would be [0, 1, 2] or ['foo', 'bar', 'baz'].
-    
-        If callable, the callable function will be evaluated against the column
-        names, returning names where the callable function evaluates to True. An
-        example of a valid callable argument would be ``lambda x: x.upper() in
-        ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
-        parsing time and lower memory usage.
-    as_recarray : boolean, default False
-        .. deprecated:: 0.19.0
-           Please call `pd.read_csv(...).to_records()` instead.
-    
-        Return a NumPy recarray instead of a DataFrame after parsing the data.
-        If set to True, this option takes precedence over the `squeeze` parameter.
-        In addition, as row indices are not available in such a format, the
-        `index_col` parameter will be ignored.
-    squeeze : boolean, default False
-        If the parsed data only contains one column then return a Series
-    prefix : str, default None
-        Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
-    mangle_dupe_cols : boolean, default True
-        Duplicate columns will be specified as 'X.0'...'X.N', rather than
-        'X'...'X'. Passing in False will cause data to be overwritten if there
-        are duplicate names in the columns.
-    dtype : Type name or dict of column -> type, default None
-        Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
-        Use `str` or `object` to preserve and not interpret dtype.
-        If converters are specified, they will be applied INSTEAD
-        of dtype conversion.
-    engine : {'c', 'python'}, optional
-        Parser engine to use. The C engine is faster while the python engine is
-        currently more feature-complete.
-    converters : dict, default None
-        Dict of functions for converting values in certain columns. Keys can either
-        be integers or column labels
-    true_values : list, default None
-        Values to consider as True
-    false_values : list, default None
-        Values to consider as False
-    skipinitialspace : boolean, default False
-        Skip spaces after delimiter.
-    skiprows : list-like or integer or callable, default None
-        Line numbers to skip (0-indexed) or number of lines to skip (int)
-        at the start of the file.
-    
-        If callable, the callable function will be evaluated against the row
-        indices, returning True if the row should be skipped and False otherwise.
-        An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
-    skipfooter : int, default 0
-        Number of lines at bottom of file to skip (Unsupported with engine='c')
-    skip_footer : int, default 0
-        .. deprecated:: 0.19.0
-           Use the `skipfooter` parameter instead, as they are identical
-    nrows : int, default None
-        Number of rows of file to read. Useful for reading pieces of large files
-    na_values : scalar, str, list-like, or dict, default None
-        Additional strings to recognize as NA/NaN. If dict passed, specific
-        per-column NA values.  By default the following values are interpreted as
-        NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',
-        '1.#IND', '1.#QNAN', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan',
-        'null'.
-    keep_default_na : bool, default True
-        If na_values are specified and keep_default_na is False the default NaN
-        values are overridden, otherwise they're appended to.
-    na_filter : boolean, default True
-        Detect missing value markers (empty strings and the value of na_values). In
-        data without any NAs, passing na_filter=False can improve the performance
-        of reading a large file
-    verbose : boolean, default False
-        Indicate number of NA values placed in non-numeric columns
-    skip_blank_lines : boolean, default True
-        If True, skip over blank lines rather than interpreting as NaN values
-    parse_dates : boolean or list of ints or names or list of lists or dict, default False
-    
-        * boolean. If True -> try parsing the index.
-        * list of ints or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
-          each as a separate date column.
-        * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
-          a single date column.
-        * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result
-          'foo'
-    
-        If a column or index contains an unparseable date, the entire column or
-        index will be returned unaltered as an object data type. For non-standard
-        datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv``
-    
-        Note: A fast-path exists for iso8601-formatted dates.
-    infer_datetime_format : boolean, default False
-        If True and `parse_dates` is enabled, pandas will attempt to infer the
-        format of the datetime strings in the columns, and if it can be inferred,
-        switch to a faster method of parsing them. In some cases this can increase
-        the parsing speed by 5-10x.
-    keep_date_col : boolean, default False
-        If True and `parse_dates` specifies combining multiple columns then
-        keep the original columns.
-    date_parser : function, default None
-        Function to use for converting a sequence of string columns to an array of
-        datetime instances. The default uses ``dateutil.parser.parser`` to do the
-        conversion. Pandas will try to call `date_parser` in three different ways,
-        advancing to the next if an exception occurs: 1) Pass one or more arrays
-        (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
-        string values from the columns defined by `parse_dates` into a single array
-        and pass that; and 3) call `date_parser` once for each row using one or
-        more strings (corresponding to the columns defined by `parse_dates`) as
-        arguments.
-    dayfirst : boolean, default False
-        DD/MM format dates, international and European format
-    iterator : boolean, default False
-        Return TextFileReader object for iteration or getting chunks with
-        ``get_chunk()``.
-    chunksize : int, default None
-        Return TextFileReader object for iteration.
-        See the `IO Tools docs
-        <http://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
-        for more information on ``iterator`` and ``chunksize``.
-    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
-        For on-the-fly decompression of on-disk data. If 'infer' and
-        `filepath_or_buffer` is path-like, then detect compression from the
-        following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
-        decompression). If using 'zip', the ZIP file must contain only one data
-        file to be read in. Set to None for no decompression.
-    
-        .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression.
-    
-    thousands : str, default None
-        Thousands separator
-    decimal : str, default '.'
-        Character to recognize as decimal point (e.g. use ',' for European data).
-    float_precision : string, default None
-        Specifies which converter the C engine should use for floating-point
-        values. The options are `None` for the ordinary converter,
-        `high` for the high-precision converter, and `round_trip` for the
-        round-trip converter.
-    lineterminator : str (length 1), default None
-        Character to break file into lines. Only valid with C parser.
-    quotechar : str (length 1), optional
-        The character used to denote the start and end of a quoted item. Quoted
-        items can include the delimiter and it will be ignored.
-    quoting : int or csv.QUOTE_* instance, default 0
-        Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
-        QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
-    doublequote : boolean, default ``True``
-       When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
-       whether or not to interpret two consecutive quotechar elements INSIDE a
-       field as a single ``quotechar`` element.
-    escapechar : str (length 1), default None
-        One-character string used to escape delimiter when quoting is QUOTE_NONE.
-    comment : str, default None
-        Indicates remainder of line should not be parsed. If found at the beginning
-        of a line, the line will be ignored altogether. This parameter must be a
-        single character. Like empty lines (as long as ``skip_blank_lines=True``),
-        fully commented lines are ignored by the parameter `header` but not by
-        `skiprows`. For example, if comment='#', parsing '#empty\na,b,c\n1,2,3'
-        with `header=0` will result in 'a,b,c' being
-        treated as the header.
-    encoding : str, default None
-        Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
-        standard encodings
-        <https://docs.python.org/3/library/codecs.html#standard-encodings>`_
-    dialect : str or csv.Dialect instance, default None
-        If provided, this parameter will override values (default or not) for the
-        following parameters: `delimiter`, `doublequote`, `escapechar`,
-        `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
-        override values, a ParserWarning will be issued. See csv.Dialect
-        documentation for more details.
-    tupleize_cols : boolean, default False
-        .. deprecated:: 0.21.0
-           This argument will be removed and will always convert to MultiIndex
-    
-        Leave a list of tuples on columns as is (default is to convert to
-        a MultiIndex on the columns)
-    error_bad_lines : boolean, default True
-        Lines with too many fields (e.g. a csv line with too many commas) will by
-        default cause an exception to be raised, and no DataFrame will be returned.
-        If False, then these "bad lines" will dropped from the DataFrame that is
-        returned.
-    warn_bad_lines : boolean, default True
-        If error_bad_lines is False, and warn_bad_lines is True, a warning for each
-        "bad line" will be output.
-    low_memory : boolean, default True
-        Internally process the file in chunks, resulting in lower memory use
-        while parsing, but possibly mixed type inference.  To ensure no mixed
-        types either set False, or specify the type with the `dtype` parameter.
-        Note that the entire file is read into a single DataFrame regardless,
-        use the `chunksize` or `iterator` parameter to return the data in chunks.
-        (Only valid with C parser)
-    buffer_lines : int, default None
-        .. deprecated:: 0.19.0
-           This argument is not respected by the parser
-    compact_ints : boolean, default False
-        .. deprecated:: 0.19.0
-           Argument moved to ``pd.to_numeric``
-    
-        If compact_ints is True, then for any column that is of integer dtype,
-        the parser will attempt to cast it as the smallest integer dtype possible,
-        either signed or unsigned depending on the specification from the
-        `use_unsigned` parameter.
-    use_unsigned : boolean, default False
-        .. deprecated:: 0.19.0
-           Argument moved to ``pd.to_numeric``
-    
-        If integer columns are being compacted (i.e. `compact_ints=True`), specify
-        whether the column should be compacted to the smallest signed or unsigned
-        integer dtype.
-    memory_map : boolean, default False
-        If a filepath is provided for `filepath_or_buffer`, map the file object
-        directly onto memory and access the data directly from there. Using this
-        option can improve performance because there is no longer any I/O overhead.
-    
-    Returns
-    -------
-    result : DataFrame or TextParser
-
-
-
-
- -
-
- -
-
-
-
In [43]:
-
-
-
pd.read_csv('data/city_pop.csv')
-
- -
-
-
- -
-
- - -
-
Out[43]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This is an example CSV file
0The text at the top here is not part of the da...
1to describe the file. You'll see this quite of...
2A -1 signifies a missing value.
3year;London;Paris;Rome
42001;7.322;2.148;2.547
52006;7.652;;2.627
62008;-1;2.211;
72009;-1;2.234;2.734
82011;8.174;;
92012;-1;2.244;2.627
102015;8.615;;
-
-
- -
- -
-
- -
-
-
-
-
-
-

We can see that by default it's done a fairly bad job of parsing the file (this is mostly because I;ve construsted the city_pop.csv file to be as obtuse as possible). It's making a lot of assumptions about the structure of the file but in general it's taking quite a naïve approach.

-

The first this we notice is that it's treating the text at the top of the file as though it's data. Checking the documentation we see that the simplest way to solve this is to use the skiprows argument to the function to which we give an integer giving the number of rows to skip:

- -
-
-
-
-
-
In [44]:
-
-
-
pd.read_csv(
-    'data/city_pop.csv',
-    skiprows=5,
-)
-
- -
-
-
- -
-
- - -
-
Out[44]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
year;London;Paris;Rome
02001;7.322;2.148;2.547
12006;7.652;;2.627
22008;-1;2.211;
32009;-1;2.234;2.734
42011;8.174;;
52012;-1;2.244;2.627
62015;8.615;;
-
-
- -
- -
-
- -
-
-
-
-
-
-

The next most obvious problem is that it is not separating the columns at all. This is controlled by the sep argument which is set to ',' by default (hence comma separated values). We can simply set it to the appropriate semi-colon:

- -
-
-
-
-
-
In [45]:
-
-
-
pd.read_csv(
-    'data/city_pop.csv',
-    skiprows=5,
-    sep=';'
-)
-
- -
-
-
- -
-
- - -
-
Out[45]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
yearLondonParisRome
020017.3222.1482.547
120067.652NaN2.627
22008-1.0002.211NaN
32009-1.0002.2342.734
420118.174NaNNaN
52012-1.0002.2442.627
620158.615NaNNaN
-
-
- -
- -
-
- -
-
-
-
-
-
-

Reading the descriptive header of our data file we see that a value of -1 signifies a missing reading so we should mark those too. This can be done after the fact but it is simplest to do it at import-time using the na_values argument:

- -
-
-
-
-
-
In [46]:
-
-
-
pd.read_csv(
-    'data/city_pop.csv',
-    skiprows=5,
-    sep=';',
-    na_values='-1'
-)
-
- -
-
-
- -
-
- - -
-
Out[46]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
yearLondonParisRome
020017.3222.1482.547
120067.652NaN2.627
22008NaN2.211NaN
32009NaN2.2342.734
420118.174NaNNaN
52012NaN2.2442.627
620158.615NaNNaN
-
-
- -
- -
-
- -
-
-
-
-
-
-

The last this we want to do is use the year column as the index for the DataFrame. This can be done by passing the name of the column to the index_col argument:

- -
-
-
-
-
-
In [47]:
-
-
-
df3 = pd.read_csv(
-    'data/city_pop.csv',
-    skiprows=5,
-    sep=';',
-    na_values='-1',
-    index_col='year'
-)
-df3
-
- -
-
-
- -
-
- - -
-
Out[47]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LondonParisRome
year
20017.3222.1482.547
20067.652NaN2.627
2008NaN2.211NaN
2009NaN2.2342.734
20118.174NaNNaN
2012NaN2.2442.627
20158.615NaNNaN
-
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 4

    -
  • Alongside data/city_pop.csv there is another file called data/cetml1659on.dat. This contains some historical weather data for a location in the UK. Import that file as a Pandas DataFrame using read_csv(), making sure that you cover all the NaN values.
  • -
  • How many years had a negative average temperature in January?
  • -
  • What was the average temperature in June over the years in the data set? Tip: look in the documentation for which method to call.
  • -
-

We will come back to this data set in a later stage.

- -
-
-
-
-
-
In [48]:
-
-
-
# Answer
-
-weather = pd.read_csv(
-    'data/cetml1659on.dat',  # file name
-    skiprows=6,  # skip header
-    sep='\s+',  # whitespace separated
-    na_values=['-99.9', '-99.99'],  # NaNs
-)
-weather.head()
-
- -
-
-
- -
-
- - -
-
Out[48]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDECYEAR
16593.04.06.07.011.013.016.016.013.010.05.02.08.87
16600.04.06.09.011.014.015.016.013.010.06.05.09.10
16615.05.06.08.011.014.015.015.013.011.08.06.09.78
16625.06.06.08.011.015.015.015.013.011.06.03.09.52
16631.01.05.07.010.014.015.015.013.010.07.05.08.63
-
-
- -
- -
-
- -
-
-
-
In [49]:
-
-
-
# Answer
-
-len(weather[weather.JAN < 0])
-
- -
-
-
- -
-
- - -
-
Out[49]:
- - - -
-
20
-
- -
- -
-
- -
-
-
-
In [50]:
-
-
-
# Answer
-
-weather.JUN.mean()
-
- -
-
-
- -
-
- - -
-
Out[50]:
- - - -
-
14.325977653631282
-
- -
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/13_basic_numpy.html b/html/answers/13_basic_numpy.html deleted file mode 100644 index 8eb3dad..0000000 --- a/html/answers/13_basic_numpy.html +++ /dev/null @@ -1,12732 +0,0 @@ - - - -13_basic_numpy - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Basic NumPy

NumPy ('Numerical Python') is the defacto standard module for doing numerical work in Python. Its main feature is its array data type which allows very compact and efficient storage of homogenous (of the same type) data.

-

A lot of the material in this section is based on SciPy Lecture Notes (CC-by 4.0).

-

As you go through this material, you'll likely find it useful to refer to the NumPy documentation, particularly the array objects section.

-

As with pandas there is a standard convention for importing numpy, and that is as np:

- -
-
-
-
-
-
In [1]:
-
-
-
import numpy as np
-
- -
-
-
- -
-
-
-
-
-
-

Now that we have access to the numpy package we can start using its features.

-

Creating arrays

In many ways a NumPy array can be treated like a standard Python list and much of the way you interact with it is identical. Given a list, you can create an array as follows:

- -
-
-
-
-
-
In [2]:
-
-
-
python_list = [1, 2, 3, 4, 5, 6, 7, 8]
-numpy_array = np.array(python_list)
-print(numpy_array)
-
- -
-
-
- -
-
- - -
-
- -
-
[1 2 3 4 5 6 7 8]
-
-
-
- -
-
- -
-
-
-
In [3]:
-
-
-
# ndim give the number of dimensions
-numpy_array.ndim
-
- -
-
-
- -
-
- - -
-
Out[3]:
- - - -
-
1
-
- -
- -
-
- -
-
-
-
In [4]:
-
-
-
# the shape of an array is a tuple of its length in each dimension. In this case it is only 1-dimensional
-numpy_array.shape
-
- -
-
-
- -
-
- - -
-
Out[4]:
- - - -
-
(8,)
-
- -
- -
-
- -
-
-
-
In [5]:
-
-
-
# as in standard Python, len() gives a sensible answer
-len(numpy_array)
-
- -
-
-
- -
-
- - -
-
Out[5]:
- - - -
-
8
-
- -
- -
-
- -
-
-
-
In [6]:
-
-
-
nested_list = [[1, 2, 3], [4, 5, 6]]
-two_dim_array = np.array(nested_list)
-print(two_dim_array)
-
- -
-
-
- -
-
- - -
-
- -
-
[[1 2 3]
- [4 5 6]]
-
-
-
- -
-
- -
-
-
-
In [7]:
-
-
-
two_dim_array.ndim
-
- -
-
-
- -
-
- - -
-
Out[7]:
- - - -
-
2
-
- -
- -
-
- -
-
-
-
In [8]:
-
-
-
two_dim_array.shape
-
- -
-
-
- -
-
- - -
-
Out[8]:
- - - -
-
(2, 3)
-
- -
- -
-
- -
-
-
-
-
-
-

It's very common when working with data to not have it already in a Python list but rather to want to create some data from scratch. numpy comes with a whole suite of functions for creating arrays. We will now run through some of the most commonly used.

- -
-
-
-
-
-
-
-
-

The first is np.arange (meaning "array range") which works in a vary similar fashion the the standard Python range() function, including how it defaults to starting from zero, doesn't include the number at the top of the range and how it allows you to specify a 'step:

- -
-
-
-
-
-
In [9]:
-
-
-
np.arange(10) #0 .. n-1  (!)
-
- -
-
-
- -
-
- - -
-
Out[9]:
- - - -
-
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-
- -
- -
-
- -
-
-
-
In [10]:
-
-
-
np.arange(1, 9, 2) # start, end (exclusive), step
-
- -
-
-
- -
-
- - -
-
Out[10]:
- - - -
-
array([1, 3, 5, 7])
-
- -
- -
-
- -
-
-
-
-
-
-

Next up is the np.linspace (meaning "linear space") which generates a given floating point numbers starting from the first argument up to the second argument. The third argument defines how many numbers to create:

- -
-
-
-
-
-
In [11]:
-
-
-
np.linspace(0, 1, 6)   # start, end, num-points
-
- -
-
-
- -
-
- - -
-
Out[11]:
- - - -
-
array([ 0. ,  0.2,  0.4,  0.6,  0.8,  1. ])
-
- -
- -
-
- -
-
-
-
-
-
-

Note how it included the end point unlike arange(). You can change this feature by using the endpoint argument:

- -
-
-
-
-
-
In [12]:
-
-
-
np.linspace(0, 1, 5, endpoint=False)
-
- -
-
-
- -
-
- - -
-
Out[12]:
- - - -
-
array([ 0. ,  0.2,  0.4,  0.6,  0.8])
-
- -
- -
-
- -
-
-
-
-
-
-

np.ones creates an n-dimensional array filled with the value 1.0. The argument you give to the function defines the shape of the array:

- -
-
-
-
-
-
In [13]:
-
-
-
np.ones((3, 3))  # reminder: (3, 3) is a tuple
-
- -
-
-
- -
-
- - -
-
Out[13]:
- - - -
-
array([[ 1.,  1.,  1.],
-       [ 1.,  1.,  1.],
-       [ 1.,  1.,  1.]])
-
- -
- -
-
- -
-
-
-
-
-
-

Likewise, you can create an array of any size filled with zeros:

- -
-
-
-
-
-
In [14]:
-
-
-
np.zeros((2, 2))
-
- -
-
-
- -
-
- - -
-
Out[14]:
- - - -
-
array([[ 0.,  0.],
-       [ 0.,  0.]])
-
- -
- -
-
- -
-
-
-
-
-
-

The np.eye (referring to the matematical identity matrix, commonly labelled as I) creates a square matrix of a given size with 1.0 on the diagonal and 0.0 elsewhere:

- -
-
-
-
-
-
In [15]:
-
-
-
np.eye(3)
-
- -
-
-
- -
-
- - -
-
Out[15]:
- - - -
-
array([[ 1.,  0.,  0.],
-       [ 0.,  1.,  0.],
-       [ 0.,  0.,  1.]])
-
- -
- -
-
- -
-
-
-
-
-
-

The np.diag creates a square matrix with the given values on the diagonal and 0.0 elsewhere:

- -
-
-
-
-
-
In [16]:
-
-
-
np.diag([1, 2, 3, 4])
-
- -
-
-
- -
-
- - -
-
Out[16]:
- - - -
-
array([[1, 0, 0, 0],
-       [0, 2, 0, 0],
-       [0, 0, 3, 0],
-       [0, 0, 0, 4]])
-
- -
- -
-
- -
-
-
-
-
-
-

Finally, you can fill an array with random numbers:

- -
-
-
-
-
-
In [17]:
-
-
-
np.random.rand(4)  # uniform in [0, 1]
-
- -
-
-
- -
-
- - -
-
Out[17]:
- - - -
-
array([ 0.10916155,  0.45514451,  0.4002642 ,  0.09294576])
-
- -
- -
-
- -
-
-
-
In [18]:
-
-
-
np.random.randn(4)  # Gaussian
-
- -
-
-
- -
-
- - -
-
Out[18]:
- - - -
-
array([-0.91134171, -0.25132784,  0.29975967,  0.05950522])
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 5

    -
  • Experiment with arange, linspace, ones, zeros, eye and diag.
  • -
  • Create different kinds of arrays with random numbers.
  • -
  • Look at the function np.empty. What does it do? When might this be useful?
  • -
- -
-
-
-
-
-
-
-
-

Reshaping arrays

Behind the scenes, a multi-dimensional NumPy array is just stored as a linear segment of memory. The fact that it is presented as having more than one dimension is simply a layer on top of that (sometimes called a view). This means that we can simply change that interpretive layer and change the shape of an array very quickly (i.e without NumPy having to copy any data around).

-

This is mostly done with the reshape() method on the array object:

- -
-
-
-
-
-
In [19]:
-
-
-
my_array = np.arange(16)
-my_array
-
- -
-
-
- -
-
- - -
-
Out[19]:
- - - -
-
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
-
- -
- -
-
- -
-
-
-
In [20]:
-
-
-
my_array.shape
-
- -
-
-
- -
-
- - -
-
Out[20]:
- - - -
-
(16,)
-
- -
- -
-
- -
-
-
-
In [21]:
-
-
-
my_array.reshape((2, 8))
-
- -
-
-
- -
-
- - -
-
Out[21]:
- - - -
-
array([[ 0,  1,  2,  3,  4,  5,  6,  7],
-       [ 8,  9, 10, 11, 12, 13, 14, 15]])
-
- -
- -
-
- -
-
-
-
In [22]:
-
-
-
my_array.reshape((4, 4))
-
- -
-
-
- -
-
- - -
-
Out[22]:
- - - -
-
array([[ 0,  1,  2,  3],
-       [ 4,  5,  6,  7],
-       [ 8,  9, 10, 11],
-       [12, 13, 14, 15]])
-
- -
- -
-
- -
-
-
-
-
-
-

Note that if you check, my_array.shape will still return (16,) as reshaped is simply a view on the original data, it hasn't actually changed it. If you want to edit the original object in-place then you can use the resize() method.

-

You can also transpose an array using the transpose() method which mirrors the array along its diagonal:

- -
-
-
-
-
-
In [23]:
-
-
-
my_array.reshape((2, 8)).transpose()
-
- -
-
-
- -
-
- - -
-
Out[23]:
- - - -
-
array([[ 0,  8],
-       [ 1,  9],
-       [ 2, 10],
-       [ 3, 11],
-       [ 4, 12],
-       [ 5, 13],
-       [ 6, 14],
-       [ 7, 15]])
-
- -
- -
-
- -
-
-
-
In [24]:
-
-
-
my_array.reshape((4,4)).transpose()
-
- -
-
-
- -
-
- - -
-
Out[24]:
- - - -
-
array([[ 0,  4,  8, 12],
-       [ 1,  5,  9, 13],
-       [ 2,  6, 10, 14],
-       [ 3,  7, 11, 15]])
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 6

Using the NumPy documentation at https://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html, to create, in one line a NumPy array which looks like:

-
[10,  60,  20,  70,  30,  80,  40,  90,  50, 100]
-
-

Hint: you will need to use transpose(), reshape() and arange() as well as one new function from the "Shape manipulation" section of the documentation. Can you find a method which uses less than 4 function calls?

- -
-
-
-
-
-
In [25]:
-
-
-
# Answer
-
-np.arange(10, 110, 10).reshape(2, 5).transpose().ravel()
-
- -
-
-
- -
-
- - -
-
Out[25]:
- - - -
-
array([ 10,  60,  20,  70,  30,  80,  40,  90,  50, 100])
-
- -
- -
-
- -
-
-
- - - - - - diff --git a/html/answers/14_more_numpy.html b/html/answers/14_more_numpy.html deleted file mode 100644 index 1f36a26..0000000 --- a/html/answers/14_more_numpy.html +++ /dev/null @@ -1,12530 +0,0 @@ - - - -14_more_numpy - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

More Numpy

Carrying on from the last lesson we will continue learning how to manipulate data in numpy before using matplotlib to plot our data.

- -
-
-
-
-
-
In [1]:
-
-
-
import numpy as np
-
- -
-
-
- -
-
-
-
-
-
-

Basic data types

You may have noticed that, in some instances, array elements are displayed with a trailing dot (e.g. 2. vs 2). This is due to a difference in the data-type used:

- -
-
-
-
-
-
In [2]:
-
-
-
a = np.array([1, 2, 3])
-a.dtype
-
- -
-
-
- -
-
- - -
-
Out[2]:
- - - -
-
dtype('int64')
-
- -
- -
-
- -
-
-
-
In [3]:
-
-
-
b = np.array([1., 2., 3.])
-b.dtype
-
- -
-
-
- -
-
- - -
-
Out[3]:
- - - -
-
dtype('float64')
-
- -
- -
-
- -
-
-
-
-
-
-

Different data-types allow us to store data more compactly in memory, but most of the time we simply work with floating point numbers. Note that, in the example above, NumPy auto-detects the data-type from the input but you can specify it explicitly:

- -
-
-
-
-
-
In [4]:
-
-
-
c = np.array([1, 2, 3], dtype=float)
-c.dtype
-
- -
-
-
- -
-
- - -
-
Out[4]:
- - - -
-
dtype('float64')
-
- -
- -
-
- -
-
-
-
-
-
-

The default data type is floating point.

- -
-
-
-
-
-
In [5]:
-
-
-
d = np.ones((3, 3))
-d.dtype
-
- -
-
-
- -
-
- - -
-
Out[5]:
- - - -
-
dtype('float64')
-
- -
- -
-
- -
-
-
-
-
-
-

There are other data types as well:

- -
-
-
-
-
-
In [6]:
-
-
-
e = np.array([1+2j, 3+4j, 5+6*1j])
-type(1j)
-#e.dtype
-
- -
-
-
- -
-
- - -
-
Out[6]:
- - - -
-
complex
-
- -
- -
-
- -
-
-
-
In [7]:
-
-
-
f = np.array([True, False, False, True])
-f.dtype
-
- -
-
-
- -
-
- - -
-
Out[7]:
- - - -
-
dtype('bool')
-
- -
- -
-
- -
-
-
-
In [8]:
-
-
-
g = np.array(['Bonjour', 'Hello', 'Hallo',])
-g.dtype     # <--- strings containing max. 7 letters
-
- -
-
-
- -
-
- - -
-
Out[8]:
- - - -
-
dtype('<U7')
-
- -
- -
-
- -
-
-
-
-
-
-

We previously came across dtypes when learing about pandas. This is because pandas uses NumPy as its underlying library. A pandas.Series is essentially a np.array with some extra features wrapped around it.

- -
-
-
-
-
-
-
-
-

Exercise 1

Recreate some of the arrays we created in yesterday's session and look at what dtype they have.

- -
-
-
-
-
-
-
-
-

Why NumPy

To show some of the advantages of NumPy over a standard Python list, let's do some benchmarking. It's an important habit in programming that whenever you think one method may be faster than another, you check to see whether your assumption is true.

-

Python provides some tools to make this easier, particularly via the timeit module. Using this functionality, IPython provides a %timeit magic function to make our life easier. To use the %timeit magic, simply put it at the beginning of a line and it will give you information about how ling it took to run. It doesn't always work as you would expect so to make your life easier, put whatever code you want to benchmark inside a function and time that function call.

-

We start by making a list and an array of 10000 items each of values counting from 0 to 9999:

- -
-
-
-
-
-
In [9]:
-
-
-
python_list = list(range(100000))
-numpy_array = np.arange(100000)
-
- -
-
-
- -
-
-
-
-
-
-

We are going to go through each item in the list and double its value in-place, such that the list is changed after the operation. To do this with a Python list we need a for loop:

- -
-
-
-
-
-
In [10]:
-
-
-
def python_double(a):
-    for i, val in enumerate(a):
-        a[i] = val * 2
-
-%timeit python_double(python_list)
-
- -
-
-
- -
-
- - -
-
- -
-
16.1 ms ± 1.18 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
-
-
-
- -
-
- -
-
-
-
-
-
-

To do the same operation in NumPy we can use the fact that multiplying a NumPy array by a value will apply that operation to each of its elements:

- -
-
-
-
-
-
In [11]:
-
-
-
def numpy_double(a):
-    a *= 2
-
-%timeit numpy_double(numpy_array)
-
- -
-
-
- -
-
- - -
-
- -
-
59.8 µs ± 1.55 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
-
-
-
- -
-
- -
-
-
-
-
-
-

As you can see, the NumPy version is at least 10 times faster, sometimes up to 100 times faster.

-

Have a think about why this might be, what is NumPy doing to make this so much faster? There are two main parts to the answer.

- -
-
-
-
-
-
-
-
-

Copies and views

A slicing operation (like reshaping before) creates a view on the original array, which is just a way of accessing array data. Thus the original array is not copied in memory. This means you can do this to large arrays without any great performance hit. You can use np.may_share_memory() to check if two arrays share the same memory block. Note however, that this uses heuristics and may give you false positives.

-

When modifying the view, the original array is modified as well:

- -
-
-
-
-
-
In [12]:
-
-
-
a = np.arange(10)
-a
-
- -
-
-
- -
-
- - -
-
Out[12]:
- - - -
-
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-
- -
- -
-
- -
-
-
-
In [13]:
-
-
-
b = a[3:7]
-
-np.may_share_memory(a, b)
-
- -
-
-
- -
-
- - -
-
Out[13]:
- - - -
-
True
-
- -
- -
-
- -
-
-
-
In [14]:
-
-
-
b[0] = 12
-b
-
- -
-
-
- -
-
- - -
-
Out[14]:
- - - -
-
array([12,  4,  5,  6])
-
- -
- -
-
- -
-
-
-
In [15]:
-
-
-
a   # (!)
-
- -
-
-
- -
-
- - -
-
Out[15]:
- - - -
-
array([ 0,  1,  2, 12,  4,  5,  6,  7,  8,  9])
-
- -
- -
-
- -
-
-
-
In [16]:
-
-
-
a = np.arange(10)
-c = a[::2].copy()  # force a copy
-c[0] = 12
-a
-
- -
-
-
- -
-
- - -
-
Out[16]:
- - - -
-
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-
- -
- -
-
- -
-
-
-
In [17]:
-
-
-
np.may_share_memory(a, c)  # we made a copy so there is no shared memory
-
- -
-
-
- -
-
- - -
-
Out[17]:
- - - -
-
False
-
- -
- -
-
- -
-
-
-
-
-
-

Whether you make a view or a copy can affect the speed of your code significantly. Be in the habit of checking whether your code is doing unnecessacy work. Also, be sure to benchmark your code as you work on it so that you notice any slowdowns and so that you know which parts are slow so you speed the right bits up.

- -
-
-
-
-
-
-
-
-

Exercise 2

    -
  • Using %timeit, time how long finding the square roots of a list of numbers would take under both standard Python and numpy.
      -
    • Tip: Python's square root function is math.sqrt. numpy's is np.sqrt.
    • -
    -
  • -
- -
-
-
-
-
-
In [18]:
-
-
-
# Answer
-
-import math
-
-python_list_2 = list(range(100000))
-
-def python_sqrt(a):
-    for i, val in enumerate(a):
-        a[i] = math.sqrt(val)
-
-%timeit python_sqrt(python_list)
-
- -
-
-
- -
-
- - -
-
- -
-
20.7 ms ± 1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-
-
- -
-
- -
-
-
-
In [19]:
-
-
-
# Answer
-
-numpy_array_2 = np.arange(100000)
-
-def numpy_sqrt(a):
-    np.sqrt(a)
-
-%timeit numpy_sqrt(numpy_array)
-
- -
-
-
- -
-
- - -
-
- -
-
159 µs ± 2.91 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
-
-
-
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/15_matplotlib.html b/html/answers/15_matplotlib.html deleted file mode 100644 index a11ce05..0000000 --- a/html/answers/15_matplotlib.html +++ /dev/null @@ -1,30408 +0,0 @@ - - - -15_matplotlib - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Plotting data with matplotlib

Plotting of data is pandas is handled by an external Python module called matplotlib. Like pandas it is a large library and has a venerable history (first released in 2003) and so we couldn't hope to cover all its functionality in this course. To see the wide range of possibilities you have with matplotlib see its example gallery.

-

Here we will cover the basic uses of it and how it integrates with pandas. While working through these examples you will likely find it very useful to refer to the matplotlib documentation.

- -
-
-
-
-
-
-
-
-

First we import pandas and numpy in the same way as we did previously.

- -
-
-
-
-
-
In [1]:
-
-
-
import numpy as np
-import pandas as pd
-from pandas import Series, DataFrame
-
- -
-
-
- -
-
-
-
-
-
-

Some matplotlib functionality is provided directly through pandas (such as the plot() method as we will see) but for much of it you need to import the matplotlib interface itself.

-

The most common interface to matplotlib is its pyplot module which provides a way to affect the current state of matplotlib directly. By convention this is imported as plt.

-

We also set the figure format to be SVG so that the plots look a little nicer in our Jupyter notebook.

- -
-
-
-
-
-
In [2]:
-
-
-
import matplotlib.pyplot as plt
-%config InlineBackend.figure_format = 'svg'
-
- -
-
-
- -
-
-
-
-
-
-

Once we have imported matplotlib we can start calling its functions. Any functions called on the plt object will affect all of matplotlib from that point on in the script.

- -
-
-
-
-
-
-
-
-

We first need to import some data to plot. Let's start with the data from the pandas section (available from cetml1659on.dat) and import it into a DataFrame:

- -
-
-
-
-
-
In [3]:
-
-
-
df = pd.read_csv(
-    'data/cetml1659on.dat',  # file name
-    skiprows=6,  # skip header
-    sep='\s+',  # whitespace separated
-    na_values=['-99.9', '-99.99'],  # NaNs
-)
-df.head()
-
- -
-
-
- -
-
- - -
-
Out[3]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDECYEAR
16593.04.06.07.011.013.016.016.013.010.05.02.08.87
16600.04.06.09.011.014.015.016.013.010.06.05.09.10
16615.05.06.08.011.014.015.015.013.011.08.06.09.78
16625.06.06.08.011.015.015.015.013.011.06.03.09.52
16631.01.05.07.010.014.015.015.013.010.07.05.08.63
-
-
- -
- -
-
- -
-
-
-
-
-
-

Pandas integrates matplotlib directly into itself so any dataframe can be plotted easily simply by calling the plot() method on one of the columns. This creates a plot object which you can then edit and alter, for example by setting the axis labels using the plt.ylabel() function before displaying it with plt.show().

-

Matplotlib operates on a single global state and calling any function on plt will alter that state. Calling df.plot() sets the currently operating plot. plt.ylabel() then alters that state and plt.show() displays it.

- -
-
-
-
-
-
In [4]:
-
-
-
df['JAN'].plot()
-
-plt.ylabel(r'Temperature ($^\circ$C)')
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Exercise 1

    -
  • Make sure you can reproduce the plot above. Try tweaking the labels or which column is plotted.
  • -
  • Try putting in two plot() calls with different months (January and July for example) before calling show().
  • -
- -
-
-
-
-
-
In [5]:
-
-
-
# Answer
-
-df['JUL'].plot()
-df['JAN'].plot()
-
-plt.ylabel(r'Temperature ($^\circ$C)')
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Making it prettier

-
-
-
-
-
-
-
-
-

While it's useful to be able to quickly plot any data we have in front of us, matplotlib's power comes from its configurability. Let's experiment with a dataset and see how much we can change the plot.

-

We'll start with a simple DataFrame contianing two columns, one with the values of a cosine, the other with the values of a sine.

- -
-
-
-
-
-
In [6]:
-
-
-
X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
-data = {'cos': np.cos(X), 'sin': np.sin(X)}
-trig = DataFrame(index=X, data=data)
-
-trig.plot()
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

You can see that it has plotted the sine and cosine curves between $\pi$ and $-\pi$. Now, let's go through and see how we can affect the display of this plot.

- -
-
-
-
-
-
-
-
-

Changing colours and line widths

First step, we want to have the cosine in blue and the sine in red and a slighty thicker line for both of them.

- -
-
-
-
-
-
In [7]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Exercise 2

    -
  • Using the temperature dataset, set the colours of the July and January lines to a warm colour and a cool colour.
  • -
  • Add in the yearly average column to the plot with a dashed line style.
  • -
- -
-
-
-
-
-
In [8]:
-
-
-
# Answer
-
-df['JUL'].plot(color='orange')
-df['JAN'].plot(color='blue')
-df['YEAR'].plot(linestyle=':')
-
-plt.ylabel(r'Temperature ($^\circ$C)')
-
-plt.legend(loc='upper left')
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Setting limits

Current limits of the figure are a bit too tight and we want to make some space in order to clearly see all data points.

- -
-
-
-
-
-
In [9]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-### New code
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Setting ticks

Current ticks are not ideal because they do not show the interesting values ($\pm\pi$,$\pm\frac{\pi}{2}$) for sine and cosine. We’ll change them such that they show only these values.

- -
-
-
-
-
-
In [10]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-### New code
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi])
-plt.yticks([-1, 0, +1])
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Setting tick labels

Ticks are now properly placed but their label is not very explicit. We could guess that 3.142 is $\pi$ but it would be better to make it explicit. When we set tick values, we can also provide a corresponding label in the second argument list. Note that we’ll use LaTeX to allow for nice rendering of the label.

- -
-
-
-
-
-
In [11]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-### New code
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi],
-           [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$'])
-
-plt.yticks([-1, 0, +1],
-           [r'$-1$', r'$0$', r'$+1$'])
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Moving spines

Spines are the lines connecting the axis tick marks and noting the boundaries of the data area. They can be placed at arbitrary positions and until now, they were on the border of the axis. We’ll change that since we want to have them in the middle. Since there are four of them (top/bottom/left/right), we’ll discard the top and right by setting their color to none and we’ll move the bottom and left ones to coordinate 0 in data space coordinates.

- -
-
-
-
-
-
In [12]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi])
-plt.yticks([-1, 0, +1])
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi],
-           [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$'])
-
-plt.yticks([-1, 0, +1],
-           [r'$-1$', r'$0$', r'$+1$'])
-
-### New code
-ax = plt.gca()  # gca stands for 'get current axis'
-ax.spines['right'].set_color('none')
-ax.spines['top'].set_color('none')
-ax.xaxis.set_ticks_position('bottom')
-ax.spines['bottom'].set_position(('data',0))
-ax.yaxis.set_ticks_position('left')
-ax.spines['left'].set_position(('data',0))
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Adding a legend

Let’s add a legend in the upper left corner. This only requires adding the keyword argument label (that will be used in the legend box) to the plot commands.

- -
-
-
-
-
-
In [13]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi])
-plt.yticks([-1, 0, +1])
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi],
-           [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$'])
-
-plt.yticks([-1, 0, +1],
-           [r'$-1$', r'$0$', r'$+1$'])
-
-ax = plt.gca()  # gca stands for 'get current axis'
-ax.spines['right'].set_color('none')
-ax.spines['top'].set_color('none')
-ax.xaxis.set_ticks_position('bottom')
-ax.spines['bottom'].set_position(('data',0))
-ax.yaxis.set_ticks_position('left')
-ax.spines['left'].set_position(('data',0))
-
-### New code
-plt.legend(loc='upper left')
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Annotate some points

Let’s annotate some interesting points using the annotate command. We chose the $\frac{2}{3}\pi$ value and we want to annotate both the sine and the cosine. We’ll first draw a marker on the curve as well as a straight dotted line. Then, we’ll use the annotate command to display some text with an arrow.

- -
-
-
-
-
-
In [14]:
-
-
-
trig.cos.plot(color="blue", linewidth=2.5, linestyle="-")
-trig.sin.plot(color="red", linewidth=2.5, linestyle="-")
-
-plt.xlim(trig.index.min() * 1.1, trig.index.max() * 1.1)
-plt.ylim(trig.cos.min() * 1.1, trig.cos.max() * 1.1)
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi])
-plt.yticks([-1, 0, +1])
-
-plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi],
-           [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$'])
-
-plt.yticks([-1, 0, +1],
-           [r'$-1$', r'$0$', r'$+1$'])
-
-ax = plt.gca()  # gca stands for 'get current axis'
-ax.spines['right'].set_color('none')
-ax.spines['top'].set_color('none')
-ax.xaxis.set_ticks_position('bottom')
-ax.spines['bottom'].set_position(('data',0))
-ax.yaxis.set_ticks_position('left')
-ax.spines['left'].set_position(('data',0))
-
-plt.legend(loc='upper left')
-
-### New code
-t = 2 * np.pi / 3
-plt.plot([t, t], [0, np.cos(t)], color='blue', linewidth=2.5, linestyle="--")
-plt.scatter([t, ], [np.cos(t), ], 50, color='blue')
-
-plt.annotate(r'$cos(\frac{2\pi}{3})=-\frac{1}{2}$',
-             xy=(t, np.cos(t)), xycoords='data',
-             xytext=(-90, -50), textcoords='offset points', fontsize=16,
-             arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
-
-plt.plot([t, t],[0, np.sin(t)], color='red', linewidth=2.5, linestyle="--")
-plt.scatter([t, ],[np.sin(t), ], 50, color='red')
-
-plt.annotate(r'$sin(\frac{2\pi}{3})=\frac{\sqrt{3}}{2}$',
-             xy=(t, np.sin(t)), xycoords='data',
-             xytext=(+10, +30), textcoords='offset points', fontsize=16,
-             arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
-### End of new code
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Now you know how to make different modifications to your plots we can make some of these changes to our temerature data.

- -
-
-
-
-
-
-
-
-

Saving plot to a file

You can take any plot you've created within Jupyter and save it to a file on disk using the plt.savefig() function. You give the function the name of the file to create and it will use whatever format is specified by the name.

- -
-
-
-
-
-
In [15]:
-
-
-
trig.plot()
-
-plt.show()
-
-plt.savefig('my_fig.svg')
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- - - -
-
<matplotlib.figure.Figure at 0x7f2ac765ef98>
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 3

    -
  • Add in a legend for the data.
  • -
  • Add an annotation to one of the spikes in the data. Make sure the label is placed nicely.
      -
    • Tip: you can get the year and temperature for a spike using:
      warm_winter_year = df['JAN'].idxmax()
      -warm_winter_temp = df['JAN'].max()
      -
      -
    • -
    -
  • -
  • Save the figure to a file and display it in your Jupyter notebook.
  • -
- -
-
-
-
-
-
In [16]:
-
-
-
# Answer
-
-df['JUL'].plot(color='orange')
-df['JAN'].plot(color='blue')
-df['YEAR'].plot(linestyle=':')
-
-plt.ylabel(r'Temperature ($^\circ$C)')
-
-plt.legend(loc='upper left')
-
-warm_winter_year = df['JAN'].idxmax()
-warm_winter_temp = df['JAN'].max()
-
-plt.annotate(r'A warm winter',
-             xy=(warm_winter_year, warm_winter_temp),
-             xytext=(-30, +30), textcoords='offset points', fontsize=14,
-             arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Bar charts

Of course, Matplotlib can plot more than just line graphs. One of the other most common plot types is a bar chart. Let's work towards plotting a bar chart of the average temperature per decade.

-

Let's start by adding a new column to the data frame which represents the decade. We create it by taking the index (which is a list of years), converting each element to a string and then replacing the fourth character with a '0'.

- -
-
-
-
-
-
In [17]:
-
-
-
years = Series(df.index, index=df.index).apply(str)
-decade = years.apply(lambda x: x[:3]+'0')
-
-df['decade'] = decade
-df.head()
-
- -
-
-
- -
-
- - -
-
Out[17]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDECYEARdecade
16593.04.06.07.011.013.016.016.013.010.05.02.08.871650
16600.04.06.09.011.014.015.016.013.010.06.05.09.101660
16615.05.06.08.011.014.015.015.013.011.08.06.09.781660
16625.06.06.08.011.015.015.015.013.011.06.03.09.521660
16631.01.05.07.010.014.015.015.013.010.07.05.08.631660
-
-
- -
- -
-
- -
-
-
-
-
-
-

Once we have our decade column, we can use Pandas groupby() function to gather our data by decade and then aggregate it by taking the mean of each decade.

- -
-
-
-
-
-
In [18]:
-
-
-
by_decade = df.groupby('decade')
-agg = by_decade.aggregate(np.mean)
-
-agg.head()
-
- -
-
-
- -
-
- - -
-
Out[18]:
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
JANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDECYEAR
decade
16503.004.006.007.0011.0013.0016.0016.0013.0010.005.002.008.870
16602.604.005.107.7010.6014.5016.0015.7013.3010.006.303.809.157
16703.252.354.507.2511.0514.4015.8015.2512.408.955.202.458.607
16802.502.804.807.4011.4514.0015.4514.9012.709.555.454.058.785
16901.892.493.996.799.6013.4415.2714.6511.938.645.263.318.134
-
-
- -
- -
-
- -
-
-
-
-
-
-

At this point, agg is a standard Pandas DataFrame so we can plot it like any other, by putting .bar after the plot call:

- -
-
-
-
-
-
In [19]:
-
-
-
agg.YEAR.plot.bar()
-
-plt.ylabel(r'Temperature ($^\circ$C)')
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
-
-
-

Exercise 4

    -
  1. Plot a bar chart of the average temperature per century.

    -
      -
    • Set the limits of the y-axis to zoom in on the data.
    • -
    -
  2. -
  3. Plot a histogram of the average annual temperature

    -
      -
    • Make sure that the x-axis is labelled correctly.
    • -
    • Tip: Look in the documentation for the right command to run
    • -
    -
  4. -
  5. Plot a scatter plot of each year's February temperature plotted against that year's January temperature. Is there an obvious correlation?

    -
  6. -
- -
-
-
-
-
-
In [20]:
-
-
-
# Answer
-
-years = Series(df.index, index=df.index).apply(str)
-century = years.apply(lambda x: x[:2]+'00')
-
-df['century'] = century
-
-by_century = df.groupby('century')
-century_avg = by_century.agg(np.mean)
-
-
-century_avg.YEAR.plot.bar()
-
-plt.xlabel(r'Century')
-plt.ylabel(r'Average yearly temperature ($^\circ$C)')
-plt.ylim(8, 10.5)
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
In [21]:
-
-
-
# Answer
-
-df.YEAR.plot.hist()
-
-plt.xlabel(r'Temperature ($^\circ$C)')
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
In [22]:
-
-
-
# Answer
-
-df.plot.scatter(x='JAN', y='FEB')
-
-plt.xlabel(r'Temperature in January($^\circ$C)')
-plt.ylabel(r'Temperature in February($^\circ$C)')
-
-plt.show()
-
- -
-
-
- -
-
- - -
-
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/16_viewing_molecules.html b/html/answers/16_viewing_molecules.html deleted file mode 100644 index 892f537..0000000 --- a/html/answers/16_viewing_molecules.html +++ /dev/null @@ -1,12553 +0,0 @@ - - - -16_viewing_molecules - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Viewing Molecules

nglview is an extremely powerful and capable 3D molecule view that runs within a web browser. It supports complex visualisations of molecules from a range of file formats, and can even be used to view trajectories. It provides a full framework for building 3D molecular visualisation into your Jupyter notebooks or websites.

-

While nglview is very powerful, that power and flexibility can be a little daunting for newcomers. BioSimSpace is a project that provides easy-to-use wrappers around common molecular simulation tasks. One such task is viewing molecules. BioSimSpace provides the function viewMolecules that uses nglview to do exactly that :-)

- -
-
-
-
-
-
In [1]:
-
-
-
from BioSimSpace import viewMolecules
-v = viewMolecules("data/dioxin.pdb")
-
- -
-
-
- -
-
- - -
-
- -
-
Reading molecules from '['data/dioxin.pdb']'
-Rendering the molecules...
-
-
-
- -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
- -
-
-
-
-
-
-

The above code has use the molecule file parsers built into BioSimSpace to load the molecule contained in dioxin.pdb. This is then rendered using nglview. The above nglview interface allows you to rotate the molecule (left click and drag), zoom in and out (pinch or scroll up or down) and translate (right click and drag, or control+click on a Mac).

-

Try moving and rotating the molecule. If you lose the molecule, click the "Center" button in the General tab to recenter the molecule.

-

Simple molecule view

- -
-
-
-
-
-
-
-
-

The BioSimSpace viewMolecules function has done two things:

-
    -
  • it first loaded the molecule(s) from the file,
  • -
  • and it then rendered them
  • -
-

Loading molecules can take a long time and use a lot of memory. To prevent you from having to repeatedly load molecules, the viewMolecules function has returned a view object that can be re-used. To see how to use it, use python's help...

- -
-
-
-
-
-
In [2]:
-
-
-
help(v)
-
- -
-
-
- -
-
- - -
-
- -
-
Help on View in module BioSimSpace.Notebook.view object:
-
-class View(builtins.object)
- |  A class for handling interactive molecular visualisations.
- |  
- |  Methods defined here:
- |  
- |  __init__(self, handle)
- |      Constructor.
- |      
- |      Positional arguments:
- |      
- |      handle -- A handle to a Sire.System or BioSimSpace.Process object.
- |  
- |  molecule(self, index=0, gui=True)
- |      View a specific molecule.
- |      
- |      Keyword arguments:
- |      
- |      index -- The molecule index.
- |      gui   -- Whether to display the gui.
- |  
- |  molecules(self, indices=None, gui=True)
- |      View specific molecules.
- |      
- |      Keyword arguments:
- |      
- |      indices -- A list of molecule indices.
- |      gui     -- Whether to display the gui.
- |  
- |  nViews(self)
- |      Return the number of views.
- |  
- |  reload(self, index=None, gui=True)
- |      Reload a particular view.
- |      
- |      Keyword arguments:
- |      
- |      index -- The view index.
- |      gui   -- Whether to display the gui.
- |  
- |  reset(self)
- |      Reset the object, clearing all view files.
- |  
- |  savePDB(self, file, index=None)
- |      Save a specific view as a PDB file.
- |      
- |      Positional arguments:
- |      
- |      file  -- The name of the file to write to.
- |      
- |      Keyword arguments:
- |      
- |      index -- The view index.
- |  
- |  system(self, gui=True)
- |      View the entire molecular system.
- |      
- |      Keyword arguments:
- |      
- |      gui -- Whether to display the gui.
- |  
- |  ----------------------------------------------------------------------
- |  Data descriptors defined here:
- |  
- |  __dict__
- |      dictionary for instance variables (if defined)
- |  
- |  __weakref__
- |      list of weak references to the object (if defined)
-
-
-
-
- -
-
- -
-
-
-
-
-
-

As you can see, we can use v.system() to view all of the loaded molecules again, without having to reload them.

- -
-
-
-
-
-
In [3]:
-
-
-
v.system()
-
- -
-
-
- -
-
- - -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
- -
-
-
-
-
-
-

You can change the representation of the molecule by clicking on the "Representation" tab. First click the "Remove" icon to remove the current representation. Then click the drop-down representation box to choose another representation (e.g. "spacefill"). Then click the "Add" icon to add that representation. Experiment with adding and removing different representations.

-

Different representations

- -
-
-
-
-
-
-
-
-

Loading lots of molecules

nglview isn't just limited to viewing small molecules. It also works really well as a viewer for large molecular systems. It (sometimes) is sufficiently clever to select appropriate representations for the molecules being loaded.

-

For example, view the protein-ligand complex in data/complex.pdb

- -
-
-
-
-
-
In [4]:
-
-
-
v = viewMolecules("data/complex.pdb")
-
- -
-
-
- -
-
- - -
-
- -
-
Reading molecules from '['data/complex.pdb']'
-Rendering the molecules...
-
-
-
- -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
- -
-
-
-
-
-
-

In this case, nglview has automatically selected a cartoon representation for the protein and a ball and stick representation for the ligand.

-

You can achieve this yourself by using selections to set different representation for different molecules (or parts of molecules). First, delete the default representations by repeatedly clicking the "Remove" button in the representations tab. Once you have removed all of them, we will add a new representation. Select the type as surface, and then type "protein" into the selection box (which starts off with a "*" in it).

-

Select protein

-

Click "Add". After some time thinking, nglview will show you a surface representation of the protein.

-

Next, add a "spacefill" representation to the ligand. The ligand residue is called "LIG", so to do this, select "spacefill", type "LIG" into the selection box, and then click add. You should now see the ligand neatly bound into the protein.

-

Select ligand

-

The selection box can be used to select proteins ("protein"), water ("water"), everything ("*") or residues by name (e.g. "LIG") or number (e.g. "35"). Play around creating different selections and representations. For example, create a "point" representation for water, a "tube" representation of the protein and a "licorice" representation of all alanine residues. Note - you can control the opacity (level of transparency) of a representation by selecting the representation in the drop down box and changing the "opacity" slider in the "Parameters" tab - see below. You can also change things like the colour scheme of the representation in this "Parameters" tab

-

Opacity

- -
-
-
-
-
-
-
-
-

Viewing individual molecules

The view object returned by BioSimSpace can be used to view specific molecules from the file. To do this, use the molecules function. This takes a list of indicies of the molecules you want to view. For example, to view the first molecule (molecule at index 0) type;

- -
-
-
-
-
-
In [5]:
-
-
-
v.molecules([0])
-
- -
-
-
- -
-
- - -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
- -
-
-
-
-
-
-

while to view molecules 100-1000 use the below code (noting that you may need to add a "ball and stick" represntation in case nglview automatically hides the water molecules).

- -
-
-
-
-
-
In [6]:
-
-
-
v.molecules( range(100,1000) )
-
- -
-
-
- -
-
- - -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
- -
-
-
-
-
-
-

Loading more complex files

BioSimSpace provides reader and writers for a variety of molecular file formats. Some of these split the molecular data over multiple files, e.g. a topology and coordinate file. To view these, pass all of the necessary files to viewMolecules in a list, e.g.

- -
-
-
-
-
-
In [7]:
-
-
-
v = viewMolecules(["data/ala.top","data/ala.crd"])
-
- -
-
-
- -
-
- - -
-
- -
-
Reading molecules from '['data/ala.top', 'data/ala.crd']'
-Rendering the molecules...
-
-
-
- -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
- -
-
-
-
-
-
-

This can be combined with molecule selection, e.g. to load and view only molecules 0-4 in the file pass the indicies of the molecules you want to view as a second argument to viewMolecule, e.g.

- -
-
-
-
-
-
In [8]:
-
-
-
v = viewMolecules(["data/ala.top","data/ala.crd"], [0,1,2,3,4])
-
- -
-
-
- -
-
- - -
-
- -
-
Reading molecules from '['data/ala.top', 'data/ala.crd']'
-Rendering the molecules...
-
-
-
- -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
- -
-
-
-
-
-
-

(in reality, all molecules are loaded, but only molecules specified by the indicies are viewed. You can still use v.system() to view all molecules)

- -
-
-
-
-
-
In [9]:
-
-
-
v.system()
-
- -
-
-
- -
-
- - -
-
- - - - - - -
-
- - -
- -
- -
-
- - - - - - -
-
- - -
- -
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/17_regular_expressions.html b/html/answers/17_regular_expressions.html deleted file mode 100644 index c0d46f6..0000000 --- a/html/answers/17_regular_expressions.html +++ /dev/null @@ -1,12816 +0,0 @@ - - - -17_regular_expressions - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

Regular Expressions

In the error handling session we tried to interpret strings as valid heights and weights. This involved looking for text such as "meter" or "kilogram" in the string, and then extracting the number. This process is called pattern matching, and is best undertaken using a regular expression.

-

Regular expressions have a long history and are available in most programming languages. Python implements a standards-compliant regular expression module, which is called re.

- -
-
-
-
-
-
In [1]:
-
-
-
import re
-
- -
-
-
- -
-
-
-
-
-
-

Let's create a string that contains a height and see if we can use a regular expression to match that...

- -
-
-
-
-
-
In [2]:
-
-
-
h = "2 meters"
-
- -
-
-
- -
-
-
-
-
-
-

To search for string "meters" in a string, using re.search, e.g.

- -
-
-
-
-
-
In [3]:
-
-
-
if re.search("meters", h):
-    print("String contains 'meters'")
-else:
-    print("No match")
-
- -
-
-
- -
-
- - -
-
- -
-
String contains 'meters'
-
-
-
- -
-
- -
-
-
-
-
-
-

re.search returns a match object if there is a match, or None if there isn't.

- -
-
-
-
-
-
In [4]:
-
-
-
m = re.search("meters", h)
-
- -
-
-
- -
-
-
-
In [5]:
-
-
-
m
-
- -
-
-
- -
-
- - -
-
Out[5]:
- - - -
-
<_sre.SRE_Match object; span=(2, 8), match='meters'>
-
- -
- -
-
- -
-
-
-
-
-
-

This matches "meters", but what about "meter". "meter" is "meters" without an "s". You can specify that a letter is matched 0 or 1 times using "?"

- -
-
-
-
-
-
In [6]:
-
-
-
h = "2 meter"
-
- -
-
-
- -
-
-
-
In [7]:
-
-
-
m = re.search("meters?", h)
-
- -
-
-
- -
-
-
-
In [8]:
-
-
-
m
-
- -
-
-
- -
-
- - -
-
Out[8]:
- - - -
-
<_sre.SRE_Match object; span=(2, 7), match='meter'>
-
- -
- -
-
- -
-
-
-
-
-
-

However, this has still not worked, as we match "meters" in the middle of the string. We need to match "meters" only at the end of the string. We do this using "$", which means match at end of string

- -
-
-
-
-
-
In [9]:
-
-
-
m = re.search("meters?$", h)
-
- -
-
-
- -
-
-
-
In [10]:
-
-
-
m
-
- -
-
-
- -
-
- - -
-
Out[10]:
- - - -
-
<_sre.SRE_Match object; span=(2, 7), match='meter'>
-
- -
- -
-
- -
-
-
-
-
-
-

We also want to be able to match "m" as well as "meters". To do this, we need to use the "or" operator, which is "|". It is a good idea to put this in round brackets to make both sides of the "or" statement clear.

- -
-
-
-
-
-
In [11]:
-
-
-
h = "2 m"
-
- -
-
-
- -
-
-
-
In [12]:
-
-
-
m = re.search("(m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [13]:
-
-
-
m
-
- -
-
-
- -
-
- - -
-
Out[13]:
- - - -
-
<_sre.SRE_Match object; span=(2, 3), match='m'>
-
- -
- -
-
- -
-
-
-
-
-
-

Next, we want to match the number, e.g. "X meters", where "X" is a number. You can use "\d" to represent any number. For example

- -
-
-
-
-
-
In [14]:
-
-
-
h = "2 meters"
-
- -
-
-
- -
-
-
-
In [15]:
-
-
-
m = re.search("\d (m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [16]:
-
-
-
m
-
- -
-
-
- -
-
- - -
-
Out[16]:
- - - -
-
<_sre.SRE_Match object; span=(0, 8), match='2 meters'>
-
- -
- -
-
- -
-
-
-
-
-
-

A problem with the above example is that it only matches a number with a single digit, as "\d" only matches a single number. To match one or more digits, we need to put a "+" afterwards, as this means "match one or more", e.g.

- -
-
-
-
-
-
In [17]:
-
-
-
h = "10 meters"
-
- -
-
-
- -
-
-
-
In [18]:
-
-
-
m = re.search("\d+ (m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [19]:
-
-
-
m
-
- -
-
-
- -
-
- - -
-
Out[19]:
- - - -
-
<_sre.SRE_Match object; span=(0, 9), match='10 meters'>
-
- -
- -
-
- -
-
-
-
-
-
-

This match breaks if the number is has decimal point, as it doesn't match the "\d". To match a decimal point, you need to use "\.", and also "?", which means "match 0 or 1 decimal points", and then "\d*", which means "match 0 or more digits"

- -
-
-
-
-
-
In [20]:
-
-
-
h = "1.5 meters"
-
- -
-
-
- -
-
-
-
In [21]:
-
-
-
m = re.search("\d+\.?\d* (m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [22]:
-
-
-
m
-
- -
-
-
- -
-
- - -
-
Out[22]:
- - - -
-
<_sre.SRE_Match object; span=(0, 10), match='1.5 meters'>
-
- -
- -
-
- -
-
-
-
-
-
-

The number must match at the beginning of the string. We use "^" to mean match at start...

- -
-
-
-
-
-
In [23]:
-
-
-
h = "some 1.8 meters"
-
- -
-
-
- -
-
-
-
In [24]:
-
-
-
m = re.search("^\d+\.?\d* (m|meters?)$", h)
-
- -
-
-
- -
-
-
-
In [25]:
-
-
-
m
-
- -
-
-
- -
-
-
-
-
-
-

Finally, we want this match to be case insensitive, and would like the user to be free to use as many spaces as they want between the number and the unit, before the string or after the string... To do this we use "\s*" to represent any number of spaces, and match using re.IGNORECASE.

- -
-
-
-
-
-
In [26]:
-
-
-
h = "   1.8 METers   "
-
- -
-
-
- -
-
-
-
In [27]:
-
-
-
m = re.search("^\s*\d+\.?\d*\s*(m|meters?)\s*$", h, re.IGNORECASE)
-
- -
-
-
- -
-
-
-
In [28]:
-
-
-
m
-
- -
-
-
- -
-
- - -
-
Out[28]:
- - - -
-
<_sre.SRE_Match object; span=(0, 16), match='   1.8 METers   '>
-
- -
- -
-
- -
-
-
-
-
-
-

The round brackets do more than just groups parts of your search. They also allow you extract the parts that match.

- -
-
-
-
-
-
In [29]:
-
-
-
m.groups()
-
- -
-
-
- -
-
- - -
-
Out[29]:
- - - -
-
('METers',)
-
- -
- -
-
- -
-
-
-
-
-
-

You can place round brackets around the parts of the match you want to capture. In this case, we want to get the number...

- -
-
-
-
-
-
In [30]:
-
-
-
m = re.search("^\s*(\d+\.?\d*)\s*(m|meters?)\s*$", h, re.IGNORECASE)
-
- -
-
-
- -
-
-
-
In [31]:
-
-
-
m.groups()
-
- -
-
-
- -
-
- - -
-
Out[31]:
- - - -
-
('1.8', 'METers')
-
- -
- -
-
- -
-
-
-
-
-
-

As m.groups()[0] contains the match of the first set of round brackets (which is the number), then we can get the number using m.groups()[0]. This enables us to rewrite the string_to_height function from the last section as;

- -
-
-
-
-
-
In [32]:
-
-
-
def string_to_height(height):
-    """Parse the passed string as a height. Valid formats are 'X m', 'X meters' etc.""" 
-    m = re.search("^\s*(\d+\.?\d*)\s*(m|meters?)\s*$", height, re.IGNORECASE)
-    
-    if m:
-        return float(m.groups()[0])
-    else:
-        raise TypeError("Cannot extract a valid height from '%s'" % height)
-
- -
-
-
- -
-
-
-
In [33]:
-
-
-
h = string_to_height("   1.5    meters   ")
-
- -
-
-
- -
-
-
-
In [34]:
-
-
-
h
-
- -
-
-
- -
-
- - -
-
Out[34]:
- - - -
-
1.5
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise

Exercise 1

Rewrite your string_to_weight function using regular expressions. Check that it responds correctly to a range of valid and invalid weights.

- -
-
-
-
-
-
In [35]:
-
-
-
def string_to_weight(weight):
-    """Parse the passed string as a weight. Valid formats are 'X kg', 'X kilos', 'X kilograms' etc.""" 
-    m = re.search("^\s*(\d+\.?\d*)\s*(kgs?|kilos?|kilograms?)\s*$", weight, re.IGNORECASE)
-    
-    if m:
-        return float(m.groups()[0])
-    else:
-        raise TypeError("Cannot extract a valid weight from '%s'" % weight)
-
- -
-
-
- -
-
-
-
In [36]:
-
-
-
string_to_weight("23.5 kilos"), string_to_weight("5kg"), string_to_weight("10 kilogram")
-
- -
-
-
- -
-
- - -
-
Out[36]:
- - - -
-
(23.5, 5.0, 10.0)
-
- -
- -
-
- -
-
-
-
-
-
-

Exercise 2

Update string_to_height so that it can also understand heights in both meters and centimeters (returning the height in meters), and update string_to_weight so that it can also understand weights in both grams and kilograms (returning the weight in kilograms). Note that you may find it easier to separate the number from the units. You can do this using the below function to divide the string into the number and units. This uses "\w" to match any word character.

- -
-
-
-
-
-
In [37]:
-
-
-
def get_number_and_unit(s):
-    """Interpret the passed string 's' as "X units", where "X" is a number and
-       "unit" is the unit. Returns the number and (lowercased) unit
-    """
-    m = re.search("^\s*(\d+\.?\d*)\s*(\w+)\s*$", s, re.IGNORECASE)
-
-    if m:
-        number = float(m.groups()[0])
-        unit = m.groups()[1].lower()
-        return (number, unit)
-    else:
-        raise TypeError("Cannot extract a valid 'number unit' from '%s'" % s)       
-
- -
-
-
- -
-
-
-
In [38]:
-
-
-
def string_to_height(height):
-    """Parse the passed string as a height. Valid formats are 'X m', 'X centimeters' etc.""" 
-    (number, unit) = get_number_and_unit(height)
-    
-    if re.search("cm|centimeters?", unit):
-        return number / 100.0
-
-    elif re.search("m|meters?", unit):
-        return number
-    else:
-        raise TypeError("Cannot convert a number with units '%s' to a valid height" % unit)
-
- -
-
-
- -
-
-
-
In [39]:
-
-
-
def string_to_weight(weight):
-    """Parse the passed string as a weight. Valid formats are 'X kg', 'X grams' etc.""" 
-    (number, unit) = get_number_and_unit(weight)
-    
-    if re.search("kgs?|kilos?|kilograms?", unit):
-        return number
-
-    elif re.search("g|grams?", unit):
-        return number / 1000.0
-    else:
-        raise TypeError("Cannot convert a number with units '%s' to a valid weight" % unit)
-
- -
-
-
- -
-
-
-
In [40]:
-
-
-
string_to_height("55 cm"), string_to_height("2m"), string_to_height("15meters")
-
- -
-
-
- -
-
- - -
-
Out[40]:
- - - -
-
(0.55, 2.0, 15.0)
-
- -
- -
-
- -
-
-
-
In [41]:
-
-
-
string_to_weight("15g"), string_to_weight("5 kilograms"), string_to_weight("5gram")
-
- -
-
-
- -
-
- - -
-
Out[41]:
- - - -
-
(0.015, 5.0, 0.005)
-
- -
- -
-
- -
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/html/answers/images/jupyter_download.jpeg b/html/answers/images/jupyter_download.jpeg deleted file mode 100644 index 4fc6b30..0000000 Binary files a/html/answers/images/jupyter_download.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_files.jpeg b/html/answers/images/jupyter_files.jpeg deleted file mode 100644 index d6b109f..0000000 Binary files a/html/answers/images/jupyter_files.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_hub.jpeg b/html/answers/images/jupyter_hub.jpeg deleted file mode 100644 index e1c2175..0000000 Binary files a/html/answers/images/jupyter_hub.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_new.jpeg b/html/answers/images/jupyter_new.jpeg deleted file mode 100644 index e22c569..0000000 Binary files a/html/answers/images/jupyter_new.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_restart.jpeg b/html/answers/images/jupyter_restart.jpeg deleted file mode 100644 index 478044c..0000000 Binary files a/html/answers/images/jupyter_restart.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_running.jpeg b/html/answers/images/jupyter_running.jpeg deleted file mode 100644 index 0999ad2..0000000 Binary files a/html/answers/images/jupyter_running.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_shell.jpeg b/html/answers/images/jupyter_shell.jpeg deleted file mode 100644 index 419dc58..0000000 Binary files a/html/answers/images/jupyter_shell.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_shutdown.jpeg b/html/answers/images/jupyter_shutdown.jpeg deleted file mode 100644 index 74234d3..0000000 Binary files a/html/answers/images/jupyter_shutdown.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_sleep.jpeg b/html/answers/images/jupyter_sleep.jpeg deleted file mode 100644 index 65ced94..0000000 Binary files a/html/answers/images/jupyter_sleep.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_state.jpeg b/html/answers/images/jupyter_state.jpeg deleted file mode 100644 index 8af1bac..0000000 Binary files a/html/answers/images/jupyter_state.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_state2.jpeg b/html/answers/images/jupyter_state2.jpeg deleted file mode 100644 index dcd37fb..0000000 Binary files a/html/answers/images/jupyter_state2.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_stop.jpeg b/html/answers/images/jupyter_stop.jpeg deleted file mode 100644 index baf8a30..0000000 Binary files a/html/answers/images/jupyter_stop.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_type_code.jpeg b/html/answers/images/jupyter_type_code.jpeg deleted file mode 100644 index 4c9dc10..0000000 Binary files a/html/answers/images/jupyter_type_code.jpeg and /dev/null differ diff --git a/html/answers/images/jupyter_type_markdown.jpeg b/html/answers/images/jupyter_type_markdown.jpeg deleted file mode 100644 index 14333e0..0000000 Binary files a/html/answers/images/jupyter_type_markdown.jpeg and /dev/null differ diff --git a/html/answers/images/view_ligand.jpeg b/html/answers/images/view_ligand.jpeg deleted file mode 100644 index b7c5da8..0000000 Binary files a/html/answers/images/view_ligand.jpeg and /dev/null differ diff --git a/html/answers/images/view_move.jpeg b/html/answers/images/view_move.jpeg deleted file mode 100644 index 3d7c18f..0000000 Binary files a/html/answers/images/view_move.jpeg and /dev/null differ diff --git a/html/answers/images/view_opacity.jpeg b/html/answers/images/view_opacity.jpeg deleted file mode 100644 index d2b8ab0..0000000 Binary files a/html/answers/images/view_opacity.jpeg and /dev/null differ diff --git a/html/answers/images/view_protein.jpeg b/html/answers/images/view_protein.jpeg deleted file mode 100644 index aadd53b..0000000 Binary files a/html/answers/images/view_protein.jpeg and /dev/null differ diff --git a/html/answers/images/view_representation.jpeg b/html/answers/images/view_representation.jpeg deleted file mode 100644 index 8b0a4e2..0000000 Binary files a/html/answers/images/view_representation.jpeg and /dev/null differ diff --git a/html/images/jupyter_download.jpeg b/html/images/jupyter_download.jpeg deleted file mode 100644 index 4fc6b30..0000000 Binary files a/html/images/jupyter_download.jpeg and /dev/null differ diff --git a/html/images/jupyter_files.jpeg b/html/images/jupyter_files.jpeg deleted file mode 100644 index d6b109f..0000000 Binary files a/html/images/jupyter_files.jpeg and /dev/null differ diff --git a/html/images/jupyter_hub.jpeg b/html/images/jupyter_hub.jpeg deleted file mode 100644 index e1c2175..0000000 Binary files a/html/images/jupyter_hub.jpeg and /dev/null differ diff --git a/html/images/jupyter_new.jpeg b/html/images/jupyter_new.jpeg deleted file mode 100644 index e22c569..0000000 Binary files a/html/images/jupyter_new.jpeg and /dev/null differ diff --git a/html/images/jupyter_restart.jpeg b/html/images/jupyter_restart.jpeg deleted file mode 100644 index 478044c..0000000 Binary files a/html/images/jupyter_restart.jpeg and /dev/null differ diff --git a/html/images/jupyter_running.jpeg b/html/images/jupyter_running.jpeg deleted file mode 100644 index 0999ad2..0000000 Binary files a/html/images/jupyter_running.jpeg and /dev/null differ diff --git a/html/images/jupyter_shell.jpeg b/html/images/jupyter_shell.jpeg deleted file mode 100644 index 419dc58..0000000 Binary files a/html/images/jupyter_shell.jpeg and /dev/null differ diff --git a/html/images/jupyter_shutdown.jpeg b/html/images/jupyter_shutdown.jpeg deleted file mode 100644 index 74234d3..0000000 Binary files a/html/images/jupyter_shutdown.jpeg and /dev/null differ diff --git a/html/images/jupyter_sleep.jpeg b/html/images/jupyter_sleep.jpeg deleted file mode 100644 index 65ced94..0000000 Binary files a/html/images/jupyter_sleep.jpeg and /dev/null differ diff --git a/html/images/jupyter_state.jpeg b/html/images/jupyter_state.jpeg deleted file mode 100644 index 8af1bac..0000000 Binary files a/html/images/jupyter_state.jpeg and /dev/null differ diff --git a/html/images/jupyter_state2.jpeg b/html/images/jupyter_state2.jpeg deleted file mode 100644 index dcd37fb..0000000 Binary files a/html/images/jupyter_state2.jpeg and /dev/null differ diff --git a/html/images/jupyter_stop.jpeg b/html/images/jupyter_stop.jpeg deleted file mode 100644 index baf8a30..0000000 Binary files a/html/images/jupyter_stop.jpeg and /dev/null differ diff --git a/html/images/jupyter_type_code.jpeg b/html/images/jupyter_type_code.jpeg deleted file mode 100644 index 4c9dc10..0000000 Binary files a/html/images/jupyter_type_code.jpeg and /dev/null differ diff --git a/html/images/jupyter_type_markdown.jpeg b/html/images/jupyter_type_markdown.jpeg deleted file mode 100644 index 14333e0..0000000 Binary files a/html/images/jupyter_type_markdown.jpeg and /dev/null differ diff --git a/html/images/view_ligand.jpeg b/html/images/view_ligand.jpeg deleted file mode 100644 index b7c5da8..0000000 Binary files a/html/images/view_ligand.jpeg and /dev/null differ diff --git a/html/images/view_move.jpeg b/html/images/view_move.jpeg deleted file mode 100644 index 3d7c18f..0000000 Binary files a/html/images/view_move.jpeg and /dev/null differ diff --git a/html/images/view_opacity.jpeg b/html/images/view_opacity.jpeg deleted file mode 100644 index d2b8ab0..0000000 Binary files a/html/images/view_opacity.jpeg and /dev/null differ diff --git a/html/images/view_protein.jpeg b/html/images/view_protein.jpeg deleted file mode 100644 index aadd53b..0000000 Binary files a/html/images/view_protein.jpeg and /dev/null differ diff --git a/html/images/view_representation.jpeg b/html/images/view_representation.jpeg deleted file mode 100644 index 8b0a4e2..0000000 Binary files a/html/images/view_representation.jpeg and /dev/null differ