diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 000000000..688134b42 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,13 @@ +name: Check spelling with codespell + +on: [push, pull_request] + +jobs: + codespell: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + # codespell version should be kept in sync with .pre-commit-config.yml + - run: pip install --user codespell==2.4.1 tomli + - run: codespell + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 05f5d3df0..b12b351c3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,3 +13,11 @@ repos: hooks: - id: ruff id: ruff-format + + - repo: https://github.com/codespell-project/codespell + # version should be kept in sync with .github/workflows/codespell.yml & micropython repo + rev: v2.4.1 + hooks: + - id: codespell + additional_dependencies: + - tomli diff --git a/micropython/bluetooth/aioble/aioble/central.py b/micropython/bluetooth/aioble/aioble/central.py index 131b1e0db..15ef8aa12 100644 --- a/micropython/bluetooth/aioble/aioble/central.py +++ b/micropython/bluetooth/aioble/aioble/central.py @@ -119,7 +119,7 @@ async def _connect( _connecting.add(device) # Event will be set in the connected IRQ, and then later - # re-used to notify disconnection. + # reused to notify disconnection. connection._event = connection._event or asyncio.ThreadSafeFlag() try: diff --git a/micropython/bluetooth/aioble/aioble/client.py b/micropython/bluetooth/aioble/aioble/client.py index 859c6e937..a3b0efb6e 100644 --- a/micropython/bluetooth/aioble/aioble/client.py +++ b/micropython/bluetooth/aioble/aioble/client.py @@ -244,7 +244,7 @@ async def read(self, timeout_ms=1000): self._register_with_connection() # This will be set by the done IRQ. self._read_status = None - # This will be set by the result and done IRQs. Re-use if possible. + # This will be set by the result and done IRQs. Reuse if possible. self._read_event = self._read_event or asyncio.ThreadSafeFlag() # Issue the read. diff --git a/micropython/espflash/example.py b/micropython/espflash/example.py index 76e8eb84e..ea4a8af95 100644 --- a/micropython/espflash/example.py +++ b/micropython/espflash/example.py @@ -13,7 +13,7 @@ esp = espflash.ESPFlash(reset, gpio0, uart) # Enter bootloader download mode, at 115200 esp.bootloader() - # Can now chage to higher/lower baudrate + # Can now change to higher/lower baudrate esp.set_baudrate(921600) # Must call this first before any flash functions. esp.flash_attach() diff --git a/micropython/lora/tests/test_time_on_air.py b/micropython/lora/tests/test_time_on_air.py index 56fa1ad81..7a26e5954 100644 --- a/micropython/lora/tests/test_time_on_air.py +++ b/micropython/lora/tests/test_time_on_air.py @@ -5,7 +5,7 @@ # # ## What is this? # -# Host tests for the BaseModem.get_time_on_air_us() function. Theses against +# Host tests for the BaseModem.get_time_on_air_us() function. These test against # dummy test values produced by the Semtech "SX1261 LoRa Calculator" software, # as downloaded from # https://lora-developers.semtech.com/documentation/product-documents/ diff --git a/micropython/senml/examples/custom_record.py b/micropython/senml/examples/custom_record.py index ece866791..90f1ddfdb 100644 --- a/micropython/senml/examples/custom_record.py +++ b/micropython/senml/examples/custom_record.py @@ -32,7 +32,7 @@ class Coordinates(SenmlRecord): def __init__(self, name, **kwargs): """overriding the init function so we can initiate the 3 senml records that will represent lat,lon, alt""" self._lat = SenmlRecord( - "lattitude", unit=SenmlUnits.SENML_UNIT_DEGREES_LATITUDE + "latitude", unit=SenmlUnits.SENML_UNIT_DEGREES_LATITUDE ) # create these before calling base constructor so that all can be init correctly from constructor self._lon = SenmlRecord("longitude", unit=SenmlUnits.SENML_UNIT_DEGREES_LONGITUDE) self._alt = SenmlRecord("altitude", unit=SenmlUnits.SENML_UNIT_METER) diff --git a/micropython/senml/senml/senml_record.py b/micropython/senml/senml/senml_record.py index 9cc260f5b..282f3e5e3 100644 --- a/micropython/senml/senml/senml_record.py +++ b/micropython/senml/senml/senml_record.py @@ -188,7 +188,7 @@ def _build_rec_dict(self, naming_map, appendTo): elif isinstance(self._value, bytearray): if ( naming_map["vd"] == "vd" - ): # neeed to make a distinction between json (needs base64) and cbor (needs binary) + ): # need to make a distinction between json (needs base64) and cbor (needs binary) result[naming_map["vd"]] = binascii.b2a_base64(self._value, newline=False).decode( "utf8" ) @@ -216,7 +216,7 @@ def _build_rec_dict(self, naming_map, appendTo): def _from_raw(self, raw, naming_map): """ - extracts te data from the raw record. Used during parsing of incoming data. + extracts the data from the raw record. Used during parsing of incoming data. :param raw: a raw senml record which still contains the original field names :param naming_map: used to map cbor names to json field names :return: diff --git a/micropython/usb/usb-device/usb/device/core.py b/micropython/usb/usb-device/usb/device/core.py index 926c662bd..20cda94ca 100644 --- a/micropython/usb/usb-device/usb/device/core.py +++ b/micropython/usb/usb-device/usb/device/core.py @@ -611,7 +611,7 @@ def stall(self, ep_addr, *args): # argument to set or clear. # # Generally endpoint STALL is handled automatically, but there are some - # device classes that need to explicitly stall or unstall an endpoint + # device classes that need to explicitly stall or un-stall an endpoint # under certain conditions. if not self._open or ep_addr not in self._eps: raise RuntimeError diff --git a/pyproject.toml b/pyproject.toml index a309df1f6..736fd3a62 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,13 @@ +[tool.codespell] +count = "" +ignore-regex = '\b[A-Z]{3}\b' +ignore-multiline-regex = "# codespell:ignore-begin *\n.*# codespell:ignore-end *\n" +ignore-words = "tools/ignore_words.txt" +quiet-level = 3 +skip = """ +./.git,\ +""" + [tool.ruff] extend-exclude = [ "python-stdlib", diff --git a/python-ecosys/requests/requests/__init__.py b/python-ecosys/requests/requests/__init__.py index 4ca7489a4..68b4b18cb 100644 --- a/python-ecosys/requests/requests/__init__.py +++ b/python-ecosys/requests/requests/__init__.py @@ -56,9 +56,9 @@ def request( import binascii username, password = auth - formated = b"{}:{}".format(username, password) - formated = str(binascii.b2a_base64(formated)[:-1], "ascii") - headers["Authorization"] = "Basic {}".format(formated) + formatted = b"{}:{}".format(username, password) + formatted = str(binascii.b2a_base64(formatted)[:-1], "ascii") + headers["Authorization"] = "Basic {}".format(formatted) try: proto, dummy, host, path = url.split("/", 3) diff --git a/python-stdlib/fnmatch/test_fnmatch.py b/python-stdlib/fnmatch/test_fnmatch.py index 97ef8fff7..d7f543336 100644 --- a/python-stdlib/fnmatch/test_fnmatch.py +++ b/python-stdlib/fnmatch/test_fnmatch.py @@ -58,8 +58,8 @@ def test_fnmatchcase(self): @unittest.skip("unsupported on MicroPython") def test_bytes(self): - self.check_match(b"test", b"te*") - self.check_match(b"test\xff", b"te*\xff") + self.check_match(b"test", b"te*") # codespell:ignore + self.check_match(b"test\xff", b"te*\xff") # codespell:ignore self.check_match(b"foo\nbar", b"foo*") diff --git a/python-stdlib/unittest/tests/test_assertions.py b/python-stdlib/unittest/tests/test_assertions.py index b191220e6..3b9904086 100644 --- a/python-stdlib/unittest/tests/test_assertions.py +++ b/python-stdlib/unittest/tests/test_assertions.py @@ -92,7 +92,7 @@ def testFalse(self): with self.assertRaises(AssertionError): self.assertFalse(True) - def testIn(self): + def testIn(self): # codespell:ignore self.assertIn("t", "cat") with self.assertRaises(AssertionError): self.assertIn("x", "cat") diff --git a/tools/ignore_words.txt b/tools/ignore_words.txt new file mode 100644 index 000000000..5286cbed5 --- /dev/null +++ b/tools/ignore_words.txt @@ -0,0 +1,12 @@ +# non-words to be ignored by codespell +# shared with micropython/micropython +asend +ure + +# Specific to micropython/micropython-lib +bu +curch +ist +clen +shttp +ody diff --git a/unix-ffi/email.charset/email/charset.py b/unix-ffi/email.charset/email/charset.py index 304cc8fef..9278766c2 100644 --- a/unix-ffi/email.charset/email/charset.py +++ b/unix-ffi/email.charset/email/charset.py @@ -222,11 +222,11 @@ def __init__(self, input_charset=DEFAULT_CHARSET): # We can try to guess which encoding and conversion to use by the # charset_map dictionary. Try that first, but let the user override # it. - henc, benc, conv = CHARSETS.get(self.input_charset, (SHORTEST, BASE64, None)) + henc, benc, conv = CHARSETS.get(self.input_charset, (SHORTEST, BASE64, None)) # codespell:ignore if not conv: conv = self.input_charset # Set the attributes, allowing the arguments to override the default. - self.header_encoding = henc + self.header_encoding = henc # codespell:ignore self.body_encoding = benc self.output_charset = ALIASES.get(conv, conv) # Now set the codecs. If one isn't defined for input_charset, diff --git a/unix-ffi/email.parser/email/parser.py b/unix-ffi/email.parser/email/parser.py index 760adeff5..7053d0734 100644 --- a/unix-ffi/email.parser/email/parser.py +++ b/unix-ffi/email.parser/email/parser.py @@ -23,7 +23,7 @@ def __init__(self, _class=Message, policy=compat32): textual representation of the message. The string must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceeded by a `Unix-from' header. The + continuation lines, optionally preceded by a `Unix-from' header. The header block is terminated either by the end of the string or by a blank line. @@ -85,7 +85,7 @@ def __init__(self, *args, **kw): textual representation of the message. The input must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceeded by a `Unix-from' header. The + continuation lines, optionally preceded by a `Unix-from' header. The header block is terminated either by the end of the input or by a blank line. diff --git a/unix-ffi/html.entities/html/entities.py b/unix-ffi/html.entities/html/entities.py index 223af74aa..b5f8717c8 100644 --- a/unix-ffi/html.entities/html/entities.py +++ b/unix-ffi/html.entities/html/entities.py @@ -1,6 +1,7 @@ """HTML character entity references.""" # maps the HTML entity name to the Unicode codepoint +# codespell:ignore-begin name2codepoint = { "AElig": 0x00C6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 "Aacute": 0x00C1, # latin capital letter A with acute, U+00C1 ISOlat1 @@ -255,9 +256,10 @@ "zwj": 0x200D, # zero width joiner, U+200D NEW RFC 2070 "zwnj": 0x200C, # zero width non-joiner, U+200C NEW RFC 2070 } - +# codespell:ignore-end # maps the HTML5 named character references to the equivalent Unicode character(s) +# codespell:ignore-begin html5 = { "Aacute": "\xc1", "aacute": "\xe1", @@ -2491,6 +2493,7 @@ "zwj;": "\u200d", "zwnj;": "\u200c", } +# codespell:ignore-end # maps the Unicode codepoint to the HTML entity name codepoint2name = {} diff --git a/unix-ffi/html.parser/html/parser.py b/unix-ffi/html.parser/html/parser.py index 74b39d49a..571b42a34 100644 --- a/unix-ffi/html.parser/html/parser.py +++ b/unix-ffi/html.parser/html/parser.py @@ -445,7 +445,7 @@ def parse_endtag(self, i): tagname = namematch.group().lower() # consume and ignore other stuff between the name and the > # Note: this is not 100% correct, since we might have things like - # , but looking for > after tha name should cover + # , but looking for > after the name should cover # most of the cases and is much simpler gtpos = rawdata.find(">", namematch.end()) self.handle_endtag(tagname) diff --git a/unix-ffi/http.client/http/client.py b/unix-ffi/http.client/http/client.py index 856848283..3556f1ca8 100644 --- a/unix-ffi/http.client/http/client.py +++ b/unix-ffi/http.client/http/client.py @@ -1038,10 +1038,10 @@ def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): self.putheader("Accept-Encoding", "identity") # we can accept "chunked" Transfer-Encodings, but no others - # NOTE: no TE header implies *only* "chunked" + # NOTE: no 'TE' header implies *only* "chunked" # self.putheader('TE', 'chunked') - # if TE is supplied in the header, then it must appear in a + # if 'TE' is supplied in the header, then it must appear in a # Connection header. # self.putheader('Connection', 'TE') @@ -1093,7 +1093,7 @@ def _set_content_length(self, body): thelen = None try: thelen = str(len(body)) - except TypeError as te: + except TypeError as te: # codespell:ignore # If this is a file-like object, try to # fstat its file descriptor try: