diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b9f4d1e..2a76da4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -29,7 +29,9 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 - - run: pip install '.[devel]' + with: + python-version: '3.9.4' + - run: pip install -e '.[devel]' - run: pre-commit install - run: pre-commit run --all-files run-tests: @@ -39,6 +41,6 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: - python-version: '3.8' + python-version: '3.9.4' - run: pip install '.[devel]' - run: pytest tests diff --git a/docker-compose.dev.yaml b/docker-compose.dev.yaml new file mode 100644 index 0000000..52ac507 --- /dev/null +++ b/docker-compose.dev.yaml @@ -0,0 +1,37 @@ +--- +version: '3' + +services: + # Elasticsearch node required as a database for Apache Kibble + elasticsearch: + image: elasticsearch:7.13.1 + ports: + - 9200:9200 + - 9300:9300 + environment: + node.name: es01 + discovery.seed_hosts: es02 + cluster.initial_master_nodes: es01 + cluster.name: kibble + ES_JAVA_OPTS: -Xms256m -Xmx256m + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - "kibble-es-data:/usr/share/elasticsearch/data" + + # Kibana to view and manage Elasticsearch + kibana: + image: kibana:7.13.1 + ports: + - 5601:5601 + depends_on: + - elasticsearch + environment: + ELASTICSEARCH_URL: http://elasticsearch:9200 + ELASTICSEARCH_HOSTS: http://elasticsearch:9200 + +volumes: + # named volumes can be managed easier using docker-compose + kibble-es-data: diff --git a/docs/architecture.rst b/docs/architecture.rst new file mode 100644 index 0000000..daec7a2 --- /dev/null +++ b/docs/architecture.rst @@ -0,0 +1,126 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Apache Kibble Overview +====================== + +Kibble configuration +-------------------- + +Currently Apache Kibble is configured using `kibble.yaml` configuration file. + +Database configuration +...................... + +.. code-block:: + + elasticsearch: + hosts: + - http://localhost:9200 + +Data sources configuration +.......................... + +Multiple data sources can be configured. Each data source is defined by a python class. Additionally to that users +have to pass ``name`` and ``config`` which is a configuration specific for a given data source. + +.. code-block:: + + data_sources: + - name: name + class: path.to.a.Class + config: + # Data source specific configuration + +Data source +----------- + +Data source represents an external source of information (for example Github, JIRA, mailing list etc). Each data source +is a python package. In this way users can easily build their own data sources and use them with Kibble. + +Data source package has to have the following structure: + +.. code-block:: + + data_source_name/ + | __init__.py + | ... + | data_types + | | __init__.py + | | type1.py + | | type2.py + | | ... + +The ``data_source_name.__init__`` should include the class defining the data source but the class can be placed in another +file in top leve directory of the package. + +Data types +.......... + +Data type represents a single type of data within a data source. For example if Github is a data source then issues and +comments will be two different data types. A data type is a class that has to implement ``fetch_data`` method that is +used to fetch and persist data. + +Data types are automatically determined using data source class path. + +Each data type is an index in Kibble elasticsearch instance. The data should be stored "as is" so users can leverage existing +documentation. + +Next to persisting data, a data type should also define metrics that can be calculate on retrieved data. + +Configuring a data source +......................... + +As described previously data sources can be configured in ``kibble.yaml`` config file. For example: + +.. code-block:: + + data_sources: + - name: kibble_github + class: kibble.data_sources.github.GithubDataSource + config: + repo_owner: apache + repo_name: kibble + enabled_data_types: + - issues + - discussions + + - name: pulsar_github + class: kibble.data_sources.github.GithubDataSource + config: + repo_owner: apache + repo_name: pulsar + enabled_data_types: + - issues + - comments + + - name: pulsar_dev_list + class: kibble.data_sources.pony.PonyDataSource + config: + list_name: dev@pulsar.apache.org + enabled_data_types: + - threads + +In the above example we can see that: + +* We configured two different data sources based on ``GithubDataSource``: apache/pulsar and apache/kibble Github repositories. + For both sources we fetch different information. For Kibble we fetch issues and discussions data while for Apache + Pulsar we fetch issues and comments data. +* There's also a third data source using ``PonyDataSource`` configured for Apache Pulsar dev list. + +Thanks to this design users will gain more granularity to configure the data they want to fetch. This also creates a big +opportunity for configuring different authorization options for each data source in future. diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..106592b --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,16 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000..375662d --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Installation steps +================== + +To install Apache Kibble run: + +.. code-block:: + + pip install -e ".[devel]" + +You will also need a Elasticsearch instance up and running. You can setup one using docker-compose + +.. code-block:: + + docker-compose -f docker-compose.dev.yaml up + +Once ES is running you can scan configured data sources: + +.. code-block:: + kibble scanners run -s github_kibble diff --git a/kibble/cli/commands/scanners_command.py b/kibble/cli/commands/scanners_command.py index 2499512..77c25f7 100644 --- a/kibble/cli/commands/scanners_command.py +++ b/kibble/cli/commands/scanners_command.py @@ -19,6 +19,9 @@ import click +from kibble.configuration.yaml_config import kconfig +from kibble.data_sources.base.base_data_source import DataSourceConfig + @click.group(name="scanners") def scanners_group(): @@ -26,21 +29,19 @@ def scanners_group(): @scanners_group.command() -def add(): - """Add new scanner configuration""" - click.echo("To be implemented!") - - -@scanners_group.command(name="list") -def list_scanners(): - """List all available scanners""" - scanners_list = ["AbcScanner", "XyzeScanner"] - for scanner in scanners_list: - click.echo(f"- {scanner}") - - -@scanners_group.command() -@click.argument("scanner_name") -def run(scanner_name: str): - """Trigger a scanning process for given scanner""" - click.echo(f"Running {scanner_name}") +@click.option("-s", "--data-source", "data_source_name", required=True) +def run(data_source_name: str): + """Trigger a scanning process for given data source""" + data_source_config = None + for ds_in_config in kconfig.get("data_sources", []): + if ds_in_config["name"] == data_source_name: + data_source_config = DataSourceConfig.from_dict(ds_in_config) + break + + if not data_source_config: + click.echo(f"Data source {data_source_name} not configured") + return + + data_source = data_source_config.get_object() + click.echo(f"Scanning {data_source_name}") + data_source.scan() diff --git a/kibble/scanners/__init__.py b/kibble/configuration/__init__.py similarity index 100% rename from kibble/scanners/__init__.py rename to kibble/configuration/__init__.py diff --git a/kibble/configuration/yaml_config.py b/kibble/configuration/yaml_config.py new file mode 100644 index 0000000..0887b5a --- /dev/null +++ b/kibble/configuration/yaml_config.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pathlib import Path +from typing import Dict + +import yaml + +KIBBLE_YAML = "kibble.yaml" + + +def parse_kibble_yaml() -> Dict: + """Reads kibble.yaml config file""" + config_path = Path(__file__).parent.parent.joinpath(KIBBLE_YAML) + with open(config_path, "r") as stream: + config = yaml.safe_load(stream) + return config + + +kconfig = parse_kibble_yaml() + +if __name__ == "__main__": + parse_kibble_yaml() diff --git a/kibble/data_sources/__init__.py b/kibble/data_sources/__init__.py new file mode 100644 index 0000000..13a8339 --- /dev/null +++ b/kibble/data_sources/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/kibble/data_sources/base/__init__.py b/kibble/data_sources/base/__init__.py new file mode 100644 index 0000000..13a8339 --- /dev/null +++ b/kibble/data_sources/base/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/kibble/data_sources/base/base_data_source.py b/kibble/data_sources/base/base_data_source.py new file mode 100644 index 0000000..fbc130d --- /dev/null +++ b/kibble/data_sources/base/base_data_source.py @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import inspect +from functools import cached_property +from pathlib import Path +from typing import Any, Dict, List, NamedTuple, Optional + +from kibble.data_sources.base.base_data_type import BaseDataType +from kibble.data_sources.base.module_loading import import_string + + +class BaseDataSource: + """Base class for all data sources""" + + _data_types_folder = "data_types" + _excluded_files = {"base"} + + def __init__(self, *, enabled_data_types: Optional[List[str]] = None): + self.enabled_data_types = enabled_data_types + + def _get_data_type_classes(self) -> Dict[str, str]: + data_source_path = Path(inspect.getfile(self.__class__)) + data_types_dir = data_source_path.parent.joinpath(self._data_types_folder) + data_type_classes = {} + + for file in data_types_dir.iterdir(): + if file.stem in self._excluded_files or file.stem.startswith("_"): + continue + data_type_classes[file.stem] = f"{self.__module__}.{self._data_types_folder}.{file.stem}.DataType" + return data_type_classes + + @cached_property + def data_types_classes(self) -> Dict[str, str]: + """Returns data types defined in this data source""" + return self._get_data_type_classes() + + def scan(self): + """Collect data for configured data types""" + unscanned = [] + if not self.enabled_data_types: + print("No data types enabled") + return + + for data_type_name in self.enabled_data_types: + klass = self.data_types_classes.get(data_type_name) + if not klass: + unscanned.append(data_type_name) + continue + data_type_class = import_string(klass) + data_type: BaseDataType = data_type_class(data_source=self) + data_type.fetch_data() + + if unscanned: + print(f"Found no data types for following configurations {unscanned}") + + +class DataSourceConfig(NamedTuple): + """Data source configuration""" + + name: str + klass: str + config: Dict[str, Any] + + @classmethod + def from_dict(cls, dictionary: Dict): + """Make DataSourceConfig from a dictionary""" + return cls( + name=dictionary["name"], + klass=dictionary["class"], + config=dictionary["config"], + ) + + def get_object(self) -> BaseDataSource: + """Return data source object defined by this config""" + ds_class = import_string(self.klass) + return ds_class(**self.config) diff --git a/kibble/data_sources/base/base_data_type.py b/kibble/data_sources/base/base_data_type.py new file mode 100644 index 0000000..9446314 --- /dev/null +++ b/kibble/data_sources/base/base_data_type.py @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import logging +from typing import Any, Dict, List, Optional + +from elasticsearch import RequestError + +from kibble.database.connection import es +from kibble.exceptions import KibbleException + + +class BaseDataType: + """Abstract, base class for all data types""" + + _index: Optional[str] + + def __init__(self, **kwargs): + self.log = logging.getLogger(__name__) + + def fetch_data(self): # pylint: disable=no-self-use + """Fetch data from data source""" + raise NotImplementedError() + + def persist(self, payload: List[Any], doc_type: str, id_mapper): + """ + Persists the payload in data type index + + :param payload: List of documents to be persisted in ES + :param doc_type: Name of the document to be used + :param id_mapper: Function that takes a single document and retrieves its id that will + be used as document ID in ES. + """ + if not self._index: + raise KibbleException(f"Data type {self.__class__.__name__} has no index defined") + + if not id_mapper: + raise KibbleException("id_mapper has to be specified to created id for document") + + try: + es.indices.create(index=self._index) + except RequestError as err: + if err.error != "resource_already_exists_exception": + raise + + for document in payload: + es.index(index=self._index, doc_type=doc_type, body=document, id=id_mapper(document)) + + def read(self, query: Optional[Dict[str, Any]] = None): + """Read data from data type index""" + query = query or {"match_all": {}} + return es.search(index=self._index, body={"query": query}) diff --git a/kibble/data_sources/base/module_loading.py b/kibble/data_sources/base/module_loading.py new file mode 100644 index 0000000..97763f0 --- /dev/null +++ b/kibble/data_sources/base/module_loading.py @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from importlib import import_module + + +def import_string(dotted_path: str): + """ + Import a dotted module path and return the attribute/class designated by the + last name in the path. Raise ImportError if the import failed. + """ + try: + module_path, class_name = dotted_path.rsplit(".", 1) + except ValueError: + # pylint: disable =raise-missing-from + raise ImportError(f"{dotted_path} doesn't look like a module path") + + module = import_module(module_path) + + try: + return getattr(module, class_name) + except AttributeError: + # pylint: disable =raise-missing-from + raise ImportError(f'Module "{module_path}" does not define a "{class_name}" attribute/class') diff --git a/kibble/data_sources/github/__init__.py b/kibble/data_sources/github/__init__.py new file mode 100644 index 0000000..063d070 --- /dev/null +++ b/kibble/data_sources/github/__init__.py @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Optional + +from kibble.data_sources.base.base_data_source import BaseDataSource +from kibble.exceptions import KibbleException +from kibble.secrets.env_variable import get_secret_from_env + + +class GithubDataSource(BaseDataSource): + """Github datasource class""" + + name = "github" + + def __init__(self, *, repo_owner: str, repo_name: str, api_key: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + self.repo_owner = repo_owner + self.repo_name = repo_name + self.api_key = api_key or get_secret_from_env("GH_API_KEY") + if not self.api_key: + raise KibbleException("No Github API_KEY") diff --git a/kibble/data_sources/github/data_types/__init__.py b/kibble/data_sources/github/data_types/__init__.py new file mode 100644 index 0000000..13a8339 --- /dev/null +++ b/kibble/data_sources/github/data_types/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/kibble/data_sources/github/data_types/base.py b/kibble/data_sources/github/data_types/base.py new file mode 100644 index 0000000..d7cb0a3 --- /dev/null +++ b/kibble/data_sources/github/data_types/base.py @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Dict, List, Optional, Union +from urllib.parse import urlencode, urljoin + +import requests + +from kibble.data_sources.base.base_data_type import BaseDataType +from kibble.data_sources.github import GithubDataSource + + +# pylint: disable=abstract-method +class GithubBaseDataType(BaseDataType): + """Base data type class for Github""" + + _index = "github" + + def __init__(self, *, data_source: GithubDataSource, **kwargs): + super().__init__(**kwargs) + + self.repo_owner = data_source.repo_owner + self.repo_name = data_source.repo_name + self.repo_full_name = f"{self.repo_owner}/{self.repo_name}" + + self.base_url = "https://api.github.com" + self.headers = { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"token {data_source.api_key}", + } + + def _send_request(self, endpoint: str, query: Optional[Dict] = None) -> Union[List, Dict]: + url = urljoin(self.base_url, endpoint) + url = f"{url}?{urlencode(query)}" if query else url + response = requests.get(url, headers=self.headers) + response.raise_for_status() + return response.json() diff --git a/kibble/data_sources/github/data_types/issues.py b/kibble/data_sources/github/data_types/issues.py new file mode 100644 index 0000000..8e2ddbc --- /dev/null +++ b/kibble/data_sources/github/data_types/issues.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Dict, List + +from kibble.data_sources.github.data_types.base import GithubBaseDataType + + +class DataType(GithubBaseDataType): + """Github issues and pull requests""" + + _doc_type = "issue" + + def fetch_data(self): + endpoint = f"/repos/{self.repo_owner}/{self.repo_name}/issues" + query = {"per_page": 100, "page": 1} + + issues: List[Dict] = [] + self.log.info("Collecting Github issues and PRs from %s", self.repo_full_name) + while new_issues := self._send_request(endpoint, query): + for issue_pr in new_issues: + issues.append(issue_pr) + query["page"] += 1 + + self.persist(issues, doc_type=self._doc_type, id_mapper=lambda r: r["id"]) diff --git a/kibble/database/connection.py b/kibble/database/connection.py new file mode 100644 index 0000000..099b763 --- /dev/null +++ b/kibble/database/connection.py @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch import Elasticsearch + +from kibble.configuration.yaml_config import kconfig + + +def create_es() -> Elasticsearch: + """Creates ES instance connected to Kibble database""" + es_hosts = kconfig["elasticsearch"]["hosts"] + return Elasticsearch(es_hosts) + + +es = create_es() diff --git a/kibble/exceptions.py b/kibble/exceptions.py new file mode 100644 index 0000000..09f5756 --- /dev/null +++ b/kibble/exceptions.py @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +class KibbleException(Exception): + """Generic Kibble expression""" + + +class SecretNotFound(Exception): + """Exception raised when secret value could not be found.""" + + def __init__(self, secret: str, secret_type: str): + self.message = f"Secret {secret} could not be found in {secret_type}" + super().__init__(self.message) diff --git a/kibble/kibble.yaml b/kibble/kibble.yaml new file mode 100644 index 0000000..f318d03 --- /dev/null +++ b/kibble/kibble.yaml @@ -0,0 +1,12 @@ +--- +elasticsearch: + hosts: + - http://localhost:9200 +data_sources: + - name: github_kibble + class: kibble.data_sources.github.GithubDataSource + config: + repo_owner: apache + repo_name: kibble + enabled_data_types: + - issues diff --git a/kibble/secrets/__init__.py b/kibble/secrets/__init__.py new file mode 100644 index 0000000..13a8339 --- /dev/null +++ b/kibble/secrets/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/kibble/secrets/env_variable.py b/kibble/secrets/env_variable.py new file mode 100644 index 0000000..c3c36b1 --- /dev/null +++ b/kibble/secrets/env_variable.py @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +from kibble.exceptions import SecretNotFound + + +def get_secret_from_env(key: str): + """Retrieves value from KIBBLE_SECRET_{key}""" + env_key = f"KIBBLE_SECRET_{key.upper()}" + secret = os.environ.get(env_key) + if not secret: + raise SecretNotFound(secret=env_key, secret_type="environment variables") + return secret diff --git a/pylintrc b/pylintrc index 5e6d939..9f98da3 100644 --- a/pylintrc +++ b/pylintrc @@ -459,7 +459,7 @@ name-group= # Regular expression which should only match function or class names that do # not require a docstring. -no-docstring-rgx=^_ +no-docstring-rgx=^_|^scan$ # List of decorators that produce properties, such as abc.abstractproperty. Add # to this list to register other decorators that produce valid properties. @@ -578,7 +578,7 @@ max-returns=6 max-statements=50 # Minimum number of public methods for a class (see R0903). -min-public-methods=2 +min-public-methods=1 [EXCEPTIONS] diff --git a/setup.py b/setup.py index a165584..02c696c 100644 --- a/setup.py +++ b/setup.py @@ -16,21 +16,22 @@ # under the License. import os +from pathlib import Path from setuptools import find_packages, setup VERSION = "2.0.0dev" -BASE_PATH = os.path.dirname(os.path.realpath(__file__)) +BASE_PATH = Path(__file__).parent DEVEL_REQUIREMENTS = [ "black==20.8b1", "pre-commit==2.7.1", - "pylint==2.6.2", + "pylint>=2.7.4", "pytest==6.1.1", ] -INSTALL_REQUIREMENTS = ["requests>=2.25.1"] +INSTALL_REQUIREMENTS = ["elasticsearch==7.13.1", "requests>=2.25.1", "click>=8.0.1", "PyYAML>=5.4.1"] EXTRAS_REQUIREMENTS = {"devel": DEVEL_REQUIREMENTS} diff --git a/tests/cli/commands/test_scanners_command.py b/tests/cli/commands/test_scanners_command.py index 2b62f9e..c2681b9 100644 --- a/tests/cli/commands/test_scanners_command.py +++ b/tests/cli/commands/test_scanners_command.py @@ -14,10 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from unittest import mock from click.testing import CliRunner from kibble.cli.commands.scanners_command import scanners_group +from kibble.configuration.yaml_config import kconfig class TestScannerCommand: @@ -28,16 +30,46 @@ def test_add(self): assert result.exit_code == 0 assert result.output.strip() == "To be implemented!" - def test_list(self): + @mock.patch("kibble.cli.commands.scanners_command.get_scanners_classes") + def test_list(self, mock_get_scanners_classes): + class MockScanner: + pass + + mock_get_scanners_classes.return_value = [MockScanner] runner = CliRunner() result = runner.invoke(scanners_group, ["list"]) assert result.exit_code == 0 - assert result.output.strip() == "- AbcScanner\n- XyzeScanner" + assert result.output.strip() == "MockScanner" - def test_run(self): - runner = CliRunner() - result = runner.invoke(scanners_group, ["run", "TestScanner"]) + @mock.patch.dict( + kconfig, + { + "data_sources": [ + { + "name": "github", + "organizations": [{"repo_owner": "apache", "repo_name": "kibble"}], + "enabled": ["mock_scanner"], + } + ] + }, + ) + @mock.patch("kibble.cli.commands.scanners_command.get_scanner") + def test_run(self, mock_get_scanner): + class MockScanner: + scanner_name = "mock_scanner" + + def __init__(self, **kwargs): + pass + def scan(self): + pass + + mock_get_scanner.return_value = MockScanner + + runner = CliRunner() + result = runner.invoke(scanners_group, ["run", "-s", "mock_scanner"]) assert result.exit_code == 0 - assert result.output.strip() == "Running TestScanner" + assert ( + result.output.strip() == "Running MockScanner for {'repo_owner': 'apache', 'repo_name': 'kibble'}" + )