diff --git a/Config Management/Ansible Auto-Deploy/3node cluster/hosts.ini b/Config Management/Ansible Auto-Deploy/3node cluster/hosts.ini new file mode 100644 index 0000000..d6e07c5 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/3node cluster/hosts.ini @@ -0,0 +1,4 @@ +[tigergraph_servers] +server1 ansible_host=hostname ansible_user=graphsql ansible_ssh_private_key_file=/Users/samuel.skidmore/Desktop/qebot.pem +server2 ansible_host=hostname ansible_user=graphsql ansible_ssh_private_key_file=/Users/samuel.skidmore/Desktop/qebot.pem +server3 ansible_host=hostname ansible_user=graphsql ansible_ssh_private_key_file=/Users/samuel.skidmore/Desktop/qebot.pem diff --git a/Config Management/Ansible Auto-Deploy/3node cluster/install_conf.json b/Config Management/Ansible Auto-Deploy/3node cluster/install_conf.json new file mode 100644 index 0000000..c55c04a --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/3node cluster/install_conf.json @@ -0,0 +1,41 @@ +{ + "BasicConfig": { + "TigerGraph": { + "Username": "tigergraph", + "[comment]":"Provide password for tigergraph user, if the user already exists, we won't change the password. If the password is empty, we will set it to default value 'tigergraph'.", + "[comment]": "TigerGraph does not support passwords with special characters such as '$'. Please change your password if it contains such special characters.", + "Password": "tigergraph", + "SSHPort": 22, + "[comment]":"(Optional)Provide valid private key file below to replace tigergraph.rsa and tigergraph.pub, which will be generated by default.", + "PrivateKeyFile": "", + "PublicKeyFile": "" + }, + "RootDir": { + "AppRoot": "/home/tigergraph/tigergraph/app", + "DataRoot": "/home/tigergraph/tigergraph/data", + "LogRoot": "/home/tigergraph/tigergraph/log", + "TempRoot": "/home/tigergraph/tigergraph/tmp" + }, + "License": "", + "[comment]":"You can add more nodes by string 'node_id: IP', appending to the following json array. Otherwise, it installs single node locally by default.", + "NodeList": [ + "m1: hostname", "m2: hostname", "m3: hostname" + ] + }, + "AdvancedConfig": { + "[comment]": "Keep the default ClusterConfig if installing locally", + "ClusterConfig": { + "[comment]": "All nodes must have the same login configurations", + "LoginConfig": { + "SudoUser": "graphsql", + "[comment]": "choose login method: 'P' for SSH using password or 'K' for SSH using key file (e.g. ec2_key.pem)", + "[comment]": "TigerGraph does not support passwords with special characters such as '$'. Please change your password if it contains such special characters.", + "Method": "K", + "P": "sudoUserPassword", + "K": "/home/graphsql/qebot.pem" + }, + "[comment]": "To install a high-availability cluster, please specify the ReplicationFactor greater than 1", + "ReplicationFactor": 3 + } + } + } diff --git a/Config Management/Ansible Auto-Deploy/3node cluster/install_tigergraph_3cluster.yml b/Config Management/Ansible Auto-Deploy/3node cluster/install_tigergraph_3cluster.yml new file mode 100644 index 0000000..1111d55 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/3node cluster/install_tigergraph_3cluster.yml @@ -0,0 +1,111 @@ +- hosts: tigergraph_servers + become: yes + tasks: + - name: Update and upgrade apt packages + become: yes + apt: + update_cache: yes + upgrade: yes + when: ansible_os_family == "Debian" + + - name: Install prerequisites for CentOS/RedHat + yum: + name: + - tar + - curl + - cronie + - iproute + - util-linux-ng + - net-tools + - nc + - coreutils + - openssh-clients + - openssh-server + - sshpass + state: present + when: ansible_os_family == "RedHat" + + - name: Install prerequisites for Ubuntu/Debian + apt: + name: + - tar + - curl + - cron + - iproute2 + - util-linux + - net-tools + - netcat + - coreutils + - openssh-client + - openssh-server + - sshpass + state: present + when: ansible_os_family == "Debian" + + - name: Install basic applications + apt: + name: "{{ item }}" + state: present + loop: + - vim + - htop + - nano + - git + when: ansible_os_family == "Debian" + + - name: Install basic applications + yum: + name: "{{ item }}" + state: present + loop: + - vim-enhanced + - htop + - nano + - git + when: ansible_os_family == "RedHat" + + - name: Download TigerGraph installer + get_url: + url: "https://tigergraph-release-download.s3.us-west-1.amazonaws.com/enterprise-edition/prebuild/tigergraph-4.1.0-offline-07180247.tar.gz?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAWNM34YTI5OWLYSEX%2F20240816%2Fus-west-1%2Fs3%2Faws4_request&X-Amz-Date=20240816T173132Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=9c7aaeeafd938e9401d6a2e0214dd1edea694c199a007ac3a5d91bba89c48d28" + dest: /home/graphsql/tigergraphansible.tar.gz + mode: '0440' + when: inventory_hostname == groups['tigergraph_servers'][0] + + - name: Extract TigerGraph installer + unarchive: + src: /home/graphsql/tigergraphansible.tar.gz + dest: /home/graphsql/ + remote_src: yes + when: inventory_hostname == groups['tigergraph_servers'][0] + + - name: Find TigerGraph installation directory + find: + paths: "/home/graphsql" + file_type: directory + patterns: "tigergraph-*" + register: tg_dir + when: inventory_hostname == groups['tigergraph_servers'][0] + + - name: Copy custom install_conf.json to TigerGraph installation directory + copy: + src: ./install_conf.json + dest: "{{ tg_dir.files[0].path }}/install_conf.json" + when: inventory_hostname == groups['tigergraph_servers'][0] + + + - name: Run TigerGraph installer non-interactively + shell: | + cd {{ tg_dir.files[0].path }} + sudo ./install.sh -n + args: + creates: "{{ tg_dir.files[0].path }}/tgdb/conf/tgdb.conf" + when: ansible_play_hosts[0] == inventory_hostname + + - name: Cleanup installation files + file: + path: "{{ item }}" + state: absent + loop: + - /home/graphsql/tigergraphansible.tar.gz + - "{{ tg_dir.files[0].path }}" + when: ansible_play_hosts[0] == inventory_hostname diff --git a/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/data_files/test.csv b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/data_files/test.csv new file mode 100644 index 0000000..e69de29 diff --git a/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/execute_gsql.yml b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/execute_gsql.yml new file mode 100644 index 0000000..fa421ea --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/execute_gsql.yml @@ -0,0 +1,16 @@ +--- +- name: Transfer GSQL file to server + copy: + src: "{{ gsql_file }}" + dest: "/tmp/{{ gsql_file | basename }}" + +- name: Run GSQL script + become: yes + become_user: tigergraph + shell: | + gsql /tmp/{{ gsql_file | basename }} + register: gsql_output + +- name: Output GSQL script execution result + debug: + msg: "{{ gsql_output.stdout }}" diff --git a/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/gsql_scripts/test.gsql b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/gsql_scripts/test.gsql new file mode 100644 index 0000000..e69de29 diff --git a/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/hosts.ini b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/hosts.ini new file mode 100644 index 0000000..56dc622 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/hosts.ini @@ -0,0 +1,2 @@ +[tigergraph_servers] +server1 ansible_host=hostname ansible_user=graphsql ansible_ssh_private_key_file=/Users/samuel.skidmore/Desktop/qebot.pem diff --git a/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/load_data.yml b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/load_data.yml new file mode 100644 index 0000000..63f2bc4 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/load_data.yml @@ -0,0 +1,21 @@ +--- +- name: Transfer data file to server + copy: + src: "{{ csv_file }}" + dest: "/home/tigergraph/tmp/{{ csv_file | basename }}" + +- name: Load data into TigerGraph + become: yes + become_user: tigergraph + shell: | + gsql -g social_net 'RUN LOADING JOB load_job_name USING file_path="/home/tigergraph/tmp/{{ csv_file | basename }}"' + register: load_result + +- name: Output load result + debug: + msg: "{{ load_result.stdout }}" + +- name: Confirm data load + debug: + msg: "Data loaded successfully for {{ csv_file | basename }}" + when: "'Successfully' in load_result.stdout" diff --git a/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/main.yml b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/main.yml new file mode 100644 index 0000000..4dcb993 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/main.yml @@ -0,0 +1,20 @@ +--- +- hosts: tigergraph_servers + become: true + become_user: tigergraph + become_method: sudo + tasks: + - name: Setup TigerGraph + include_tasks: setup_tigergraph.yml + + - name: Execute each GSQL script + include_tasks: execute_gsql.yml + loop: "{{ lookup('fileglob', 'gsql_scripts/*.gsql', wantlist=true) }}" + loop_control: + loop_var: gsql_file + + - name: Load data from each CSV file + include_tasks: load_data.yml + loop: "{{ lookup('fileglob', 'data_files/*.csv', wantlist=true) }}" + loop_control: + loop_var: csv_file diff --git a/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/setup_tigergraph.yml b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/setup_tigergraph.yml new file mode 100644 index 0000000..72b32cf --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/Ansible GSQL and Dataloading/setup_tigergraph.yml @@ -0,0 +1,18 @@ +tasks: + - name: Ensure TigerGraph is running + become: true + become_user: tigergraph + become_method: sudo + shell: gadmin status -v + ignore_errors: true + + - name: Check TigerGraph status + become: true + become_user: tigergraph + become_method: sudo + shell: gadmin status + register: tg_status + + - name: Output status + debug: + msg: "{{ tg_status.stdout }}" diff --git a/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/Archive.zip b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/Archive.zip new file mode 100644 index 0000000..a2b672d Binary files /dev/null and b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/Archive.zip differ diff --git a/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/ansible.cfg b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/ansible.cfg new file mode 100644 index 0000000..04132c4 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/ansible.cfg @@ -0,0 +1,5 @@ +[defaults] +inventory = ./inventory.ini +host_key_checking = False +remote_tmp = /home/tigergraph/tigergraph/ansible-tmp +pipelining = True \ No newline at end of file diff --git a/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/configs/enable_configs.yml b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/configs/enable_configs.yml new file mode 100644 index 0000000..93ab6a6 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/configs/enable_configs.yml @@ -0,0 +1,15 @@ +enable_configs: + - name: GSQL.UDF.EnablePutExpr + value: "true" + - name: RESTPP.Factory.EnableAuth + value: "false" + - name: GSQL.UDF.EnablePutTgExpr + value: "true" + - name: GSQL.UDF.EnablePutTokenBank + value: "true" + - name: GSQL.UDF.Policy.Enable + value: "false" + + + + diff --git a/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/configs/other_configs.yml b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/configs/other_configs.yml new file mode 100644 index 0000000..c244b93 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/configs/other_configs.yml @@ -0,0 +1,7 @@ +other_configs: + - name: Admin.BasicConfig.LogConfig.LogFileMaxDurationDay + value: 50 + - name: Admin.BasicConfig.LogConfig.LogFileMaxSizeMB + value: 100 + - name: RESTPP.Factory.DefaultQueryTimeoutSec + value: 50000 \ No newline at end of file diff --git a/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/inventory.ini b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/inventory.ini new file mode 100644 index 0000000..1f7a74e --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/inventory.ini @@ -0,0 +1,2 @@ +[tigergraph] +server1 ansible_host=IPofHost ansible_user=graphsql ansible_ssh_private_key_file=.pemkey diff --git a/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/playbook.yml b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/playbook.yml new file mode 100644 index 0000000..a6cd967 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/playbook.yml @@ -0,0 +1,57 @@ +--- +- name: Configure TigerGraph + hosts: tigergraph + become: yes + become_user: tigergraph + vars_files: + - configs/enable_configs.yml + - configs/other_configs.yml + + tasks: + - name: Debug PATH + shell: echo $PATH + register: path_output + + - debug: + var: path_output.stdout + + - name: Check and set enable configurations + shell: | + export PATH=$PATH:/home/tigergraph/tigergraph/app/cmd + current_value=$(gadmin config get {{ item.name }}) + if [ "$current_value" != "{{ item.value }}" ]; then + gadmin config set {{ item.name }} {{ item.value }} + echo 'changed' + else + echo 'no change' + fi + with_items: "{{ enable_configs }}" + register: set_enable_configs + changed_when: "'changed' in set_enable_configs.stdout_lines" + + - name: Check and set other configurations + shell: | + export PATH=$PATH:/home/tigergraph/tigergraph/app/cmd + current_value=$(gadmin config get {{ item.name }}) + if [ "$current_value" != "{{ item.value }}" ]; then + gadmin config set {{ item.name }} {{ item.value }} + echo 'changed' + else + echo 'no change' + fi + with_items: "{{ other_configs }}" + register: set_other_configs + changed_when: "'changed' in set_other_configs.stdout_lines" + + - name: Apply configuration changes if needed + shell: | + export PATH=$PATH:/home/tigergraph/tigergraph/app/cmd + echo 'y' | gadmin config apply + when: set_enable_configs.changed or set_other_configs.changed + register: apply_configs + + - name: Restart services if configuration was applied + shell: | + export PATH=$PATH:/home/tigergraph/tigergraph/app/cmd + gadmin restart all -y + when: apply_configs.changed diff --git a/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/readme.md b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/readme.md new file mode 100644 index 0000000..6f59890 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/GSQL_Config_Management/readme.md @@ -0,0 +1,61 @@ +TigerGraph Configuration Management +This project utilizes Ansible to automate the configuration management of TigerGraph instances. + +Table of Contents +Overview +Requirements +Installation +Usage +Configuration +Tasks +License +Overview +This project configures various settings for TigerGraph using Ansible playbooks. It aims to ensure that the desired configurations are applied consistently across multiple TigerGraph instances. + +Requirements +Ansible (version 2.10 or higher) +SSH access to the TigerGraph server +Appropriate user privileges +Installation + +Install Ansible: + + +pip install ansible +Usage +Run the Ansible playbook to apply configurations: + + +ansible-playbook -i inventory.ini playbook.yml + +To run Ansible in check mode, you can use the --check flag when running a playbook. Here's an example command: + +ansible-playbook playbook.yml --check +This command will run the specified playbook in check mode, showing you what changes would occur without actually applying them. + +Additionally, if you want to see the detailed differences of what would be changed, you can combine --check with the --diff flag: + + +ansible-playbook playbook.yml --check --diff +This will show the differences between the current state and the state that would be achieved by running the playbook, which is especially useful for configuration management tasks. + + + + + +Configuration +The configurations are defined in YAML files located in the configs directory: + +enable_configs.yml: Contains settings that are to be enabled. +other_configs.yml: Contains additional configurations. +Tasks +The playbook performs the following tasks: + +Debug the current PATH environment variable. +Check and set enable configurations. +Check and set other configurations. +Apply configuration changes if needed. +Restart services if configurations were applied. + + +05b6a289-1740-4520-aff2-316e16cc3995 \ No newline at end of file diff --git a/Config Management/Ansible Auto-Deploy/Readme.md b/Config Management/Ansible Auto-Deploy/Readme.md new file mode 100644 index 0000000..3c7560d --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/Readme.md @@ -0,0 +1,39 @@ +# TigerGraph Ansible Installation + +This project contains Ansible playbooks for automating the installation and configuration of TigerGraph on remote servers. It simplifies the process of setting up TigerGraph environments, ensuring consistency across installations. + +## Prerequisites + +Before you begin, ensure you have the following: +- Ansible installed on your control machine. +- Access to the target server(s) with SSH. +- The target server(s) are running a supported Linux distribution. +- You have the necessary permissions to execute commands as `sudo` on the target server(s). + +## Getting Started + +1. **Clone this repository** to your local machine or control node where Ansible is installed. + + ```bash + git clone + cd + ``` + +2. **Set up your inventory** by editing the `hosts.ini` file. Replace the placeholder values with your server's details. + + ```ini + [tigergraph_servers] + server1 ansible_host=your_server_ip ansible_user=your_user ansible_ssh_private_key_file=path_to_your_pem_file + ``` + +3. **Review the playbook** `install_tigergraph.yml` to ensure it matches your installation requirements. You may need to adjust the TigerGraph version or installation parameters. + +## Running the Playbook + +To install TigerGraph on your target server(s), run the following command: + +```bash +ansible-playbook -i hosts.ini install_tigergraph.yml +ansible-playbook -vvv -i hosts.ini install_tigergraph.yml +for verbose + diff --git a/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/configure_tigergraph.sh b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/configure_tigergraph.sh new file mode 100644 index 0000000..c29141f --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/configure_tigergraph.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Set variables +TG_HOST="http://YOUR_TG_HOST:9000" +TG_TOKEN="YOUR_TG_TOKEN" + +# Enable V2 data loading +curl -X PUT -H "Authorization: Bearer $TG_TOKEN" -H "Content-Type: application/json" -d '{"dataLoadingV2": true}' "$TG_HOST/graph/settings" + +# Create Kafka data source +curl -X POST -H "Authorization: Bearer $TG_TOKEN" -H "Content-Type: application/json" -d '{ + "name": "kafka_source", + "brokerURL": "YOUR_KAFKA_BROKER_URL", + "topic": "my_topic", + "format": "json", + "kafkaConfig": { + "group.id": "tg_consumer_group", + "enable.auto.commit": "false", + "auto.offset.reset": "earliest" + } +}' "$TG_HOST/restpp/datasource" + +# Create a loading job +curl -X POST -H "Authorization: Bearer $TG_TOKEN" -H "Content-Type: application/json" -d '{ + "job": "CREATE LOADING JOB load_from_kafka FOR GRAPH my_graph { + DEFINE FILENAME kafka_data = \"kafka://kafka_source\"; + LOAD kafka_data TO VERTEX Person VALUES ($\"person_id\", $\"name\", $\"age\") + USING SEPARATOR=\",\", HEADER=\"true\", EOL=\"LF\"; + LOAD kafka_data TO EDGE Knows VALUES ($\"person_id\", $\"friend_id\") + USING SEPARATOR=\",\", HEADER=\"true\", EOL=\"LF\"; + }" +}' "$TG_HOST/ddl" + +# Configure CDC to send data back to Kafka +curl -X POST -H "Authorization: Bearer $TG_TOKEN" -H "Content-Type: application/json" -d '{ + "graph": "my_graph", + "config": { + "kafka": { + "brokerURL": "YOUR_KAFKA_BROKER_URL", + "topic": "processed_data_topic" + } + } +}' "$TG_HOST/restpp/datasource/cdc" + +echo "TigerGraph configuration completed." diff --git a/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/deploy_kafka_zookeeper.yml b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/deploy_kafka_zookeeper.yml new file mode 100644 index 0000000..da8641d --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/deploy_kafka_zookeeper.yml @@ -0,0 +1,62 @@ +--- +- hosts: kafka_servers + become: true + vars: + kafka_version: '3.0.0' + kafka_install_dir: '/opt/kafka' + kafka_data_dir: '/var/lib/kafka' + zookeeper_data_dir: '/var/lib/zookeeper' + kafka_port: 9092 + zookeeper_port: 2181 + + tasks: + - name: Install Java (required for Kafka and Zookeeper) + apt: + name: openjdk-11-jdk + state: present + + - name: Download Kafka + get_url: + url: "https://downloads.apache.org/kafka/{{ kafka_version }}/kafka_2.13-{{ kafka_version }}.tgz" + dest: "/tmp/kafka.tgz" + + - name: Extract Kafka + unarchive: + src: "/tmp/kafka.tgz" + dest: "{{ kafka_install_dir }}" + remote_src: true + creates: "{{ kafka_install_dir }}/kafka_2.13-{{ kafka_version }}" + + - name: Create Kafka and Zookeeper data directories + file: + path: "{{ item }}" + state: directory + loop: + - "{{ kafka_data_dir }}" + - "{{ zookeeper_data_dir }}" + + - name: Configure Zookeeper + copy: + dest: "{{ kafka_install_dir }}/config/zookeeper.properties" + content: | + dataDir={{ zookeeper_data_dir }} + clientPort={{ zookeeper_port }} + + - name: Configure Kafka + copy: + dest: "{{ kafka_install_dir }}/config/server.properties" + content: | + broker.id=0 + listeners=PLAINTEXT://:{{ kafka_port }} + log.dirs={{ kafka_data_dir }} + zookeeper.connect=localhost:{{ zookeeper_port }} + + - name: Start Zookeeper + shell: "{{ kafka_install_dir }}/bin/zookeeper-server-start.sh -daemon {{ kafka_install_dir }}/config/zookeeper.properties" + args: + chdir: "{{ kafka_install_dir }}" + + - name: Start Kafka + shell: "{{ kafka_install_dir }}/bin/kafka-server-start.sh -daemon {{ kafka_install_dir }}/config/server.properties" + args: + chdir: "{{ kafka_install_dir }}" diff --git a/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/docker_ex_kafka_gui/Dockerfile b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/docker_ex_kafka_gui/Dockerfile new file mode 100644 index 0000000..1709d64 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/docker_ex_kafka_gui/Dockerfile @@ -0,0 +1,45 @@ +# Start with a base image for Kafka +FROM bitnami/kafka:latest + +# Switch to root user to install dependencies +USER root + +# Install dependencies for Kafdrop +RUN apt-get update && \ + apt-get install -y wget openjdk-17-jre-headless --fix-missing && \ + apt-get clean + +# Download and set up Kafdrop +RUN wget https://github.com/obsidiandynamics/kafdrop/releases/download/3.29.0/kafdrop-3.29.0.jar -O /kafdrop.jar + +# Expose Kafka and Kafdrop ports +EXPOSE 9092 9000 + +# Environment variables for Kafka +ENV KAFKA_BROKER_ID=1 +ENV KAFKA_LISTENERS=PLAINTEXT://:9092 +ENV KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092 +ENV KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 +ENV ALLOW_PLAINTEXT_LISTENER=yes + +# Create the start script inside the Dockerfile with root permissions +RUN echo "#!/bin/bash\n\ +# Start ZooKeeper\n\ +/opt/bitnami/zookeeper/bin/zkServer.sh start &\n\ +# Wait for ZooKeeper to start\n\ +sleep 10\n\ +# Start Kafka\n\ +/opt/bitnami/kafka/bin/kafka-server-start.sh /opt/bitnami/kafka/config/server.properties &\n\ +# Wait for Kafka to start\n\ +sleep 20\n\ +# Start Kafdrop\n\ +java -jar /kafdrop.jar --kafka.brokerConnect=localhost:9092 --server.port=9000\n\ +# Keep container running\n\ +tail -f /dev/null\n" > /start-services.sh && \ +chmod +x /start-services.sh + +# Switch back to the default user +USER 1001 + +# Start Kafka and Kafdrop +CMD ["/bin/bash", "/start-services.sh"] diff --git a/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/docker_ex_kafka_gui/docker-compose.yml b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/docker_ex_kafka_gui/docker-compose.yml new file mode 100644 index 0000000..a383e3b --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/docker_ex_kafka_gui/docker-compose.yml @@ -0,0 +1,31 @@ +version: '3.8' + +services: + zookeeper: + image: bitnami/zookeeper:latest + environment: + - ALLOW_ANONYMOUS_LOGIN=yes + ports: + - "2181:2181" + + kafka: + image: bitnami/kafka:latest + environment: + - KAFKA_BROKER_ID=1 + - KAFKA_LISTENERS=PLAINTEXT://:9092 + - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092 + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - ALLOW_PLAINTEXT_LISTENER=yes + ports: + - "9092:9092" + depends_on: + - zookeeper + + kafdrop: + image: obsidiandynamics/kafdrop:latest + ports: + - "9001:9000" + environment: + - KAFKA_BROKERCONNECT=kafka:9092 + depends_on: + - kafka diff --git a/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/docker_ex_kafka_gui/readme.md b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/docker_ex_kafka_gui/readme.md new file mode 100644 index 0000000..2dc244f --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/docker_ex_kafka_gui/readme.md @@ -0,0 +1,19 @@ +Instructions +Save the Dockerfile: Copy the above Dockerfile content into a file named Dockerfile. + +Build the Docker Image: Run the following command in the directory containing the Dockerfile: + +bash +Copy code +docker build -t kafka-kafdrop . +Run the Docker Container: Start the container with: + +bash +Copy code +docker_ex_kafka_gui % docker run -p 9092:9092 -p 9001:9000 kafka-kafdrop + +This setup will automatically create and run a script to start ZooKeeper, Kafka, and Kafdrop. Kafka will be accessible on port 9092, and the Kafdrop GUI will be accessible on port 9000. This allows you to connect your TigerGraph CDC directly to Kafka and manage it through the Kafdrop web interface. Let me know if you have any further questions or need additional modifications! + + +DOCKER COMPOSE: +docker-compose up diff --git a/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/send_data_to_kafka.sh b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/send_data_to_kafka.sh new file mode 100644 index 0000000..6571fc9 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/send_data_to_kafka.sh @@ -0,0 +1,37 @@ +from kafka import KafkaProducer +import json +import time + +# Kafka Configuration +KAFKA_BROKER = 'localhost:9092' +TOPIC = 'my_topic' + +# Create Kafka producer +producer = KafkaProducer( + bootstrap_servers=KAFKA_BROKER, + value_serializer=lambda v: json.dumps(v).encode('utf-8') +) + +def get_data(): + # Example data structure matching TigerGraph loading job schema + return { + 'person_id': '123', + 'name': 'Alice', + 'age': 30, + 'friend_id': '456' + } + +def monitor_kafka_size(topic): + # Placeholder function for monitoring Kafka size + return False # Implement actual logic for checking size + +while True: + if not monitor_kafka_size(TOPIC): + data = get_data() + producer.send(TOPIC, value=data) + print(f'Sent: {data}') + else: + print('Kafka topic size limit reached, waiting...') + time.sleep(10) + + time.sleep(1) # Adjust rate of sending messages as needed diff --git a/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/test/loading_job.gsql b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/test/loading_job.gsql new file mode 100644 index 0000000..5068d73 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/test/loading_job.gsql @@ -0,0 +1,79 @@ +# 1. Use graph +USE GRAPH AntiFraud + +# 2. Create loading job +SET sys.data_root="mock_data" +CREATE LOADING JOB loading_job FOR GRAPH AntiFraud { + DEFINE FILENAME file_individual="$sys.data_root/Individual_vertices.csv"; + LOAD file_individual TO VERTEX Individual VALUES($0, $1, $2, $3, $4, $5) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_company="$sys.data_root/Company_vertices.csv"; + LOAD file_company TO VERTEX Company VALUES($0, $1, $2, $3) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_beneficialowner="$sys.data_root/BeneficialOwner_vertices.csv"; + LOAD file_beneficialowner TO VERTEX BeneficialOwner VALUES($0, $1, $2, $3) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_merchant="$sys.data_root/Merchant_vertices.csv"; + LOAD file_merchant TO VERTEX Merchant VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_account="$sys.data_root/Account_vertices.csv"; + LOAD file_account TO VERTEX Account VALUES($0, $1, $2, $3, $4, $5, $6) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_bank="$sys.data_root/Bank_vertices.csv"; + LOAD file_bank TO VERTEX Bank VALUES($0, $1, $2, $3) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_sar="$sys.data_root/SAR_vertices.csv"; + LOAD file_sar TO VERTEX SAR VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_creditcard="$sys.data_root/CreditCard_vertices.csv"; + LOAD file_creditcard TO VERTEX CreditCard VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_debitcard="$sys.data_root/DebitCard_vertices.csv"; + LOAD file_debitcard TO VERTEX DebitCard VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_transaction="$sys.data_root/Transaction_vertices.csv"; + LOAD file_transaction TO VERTEX Transaction VALUES($0, $1, $2, $3) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_acct_has_bank="$sys.data_root/ACCT_HAS_BANK_edges.csv"; + LOAD file_acct_has_bank TO EDGE ACCT_HAS_BANK VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_cust_has_acct="$sys.data_root/CUST_HAS_ACCT_edges.csv"; + LOAD file_cust_has_acct TO EDGE CUST_HAS_ACCT VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_merchatn_has_acct="$sys.data_root/MERCHATN_HAS_ACCT_edges.csv"; + LOAD file_merchatn_has_acct TO EDGE MERCHATN_HAS_ACCT VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_bo_has_acct="$sys.data_root/BO_HAS_ACCT_edges.csv"; + LOAD file_bo_has_acct TO EDGE BO_HAS_ACCT VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_has_name="$sys.data_root/HAS_NAME_edges.csv"; + LOAD file_has_name TO EDGE HAS_NAME VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_has_dob="$sys.data_root/HAS_DOB_edges.csv"; + LOAD file_has_dob TO EDGE HAS_DOB VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_has_phone="$sys.data_root/HAS_PHONE_edges.csv"; + LOAD file_has_phone TO EDGE HAS_PHONE VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_has_email="$sys.data_root/HAS_EMAIL_edges.csv"; + LOAD file_has_email TO EDGE HAS_EMAIL VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_has_ssn="$sys.data_root/HAS_SSN_edges.csv"; + LOAD file_has_ssn TO EDGE HAS_SSN VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_has_address="$sys.data_root/HAS_ADDRESS_edges.csv"; + LOAD file_has_address TO EDGE HAS_ADDRESS VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_acct_uses_ip="$sys.data_root/ACCT_USES_IP_edges.csv"; + LOAD file_acct_uses_ip TO EDGE ACCT_USES_IP VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_acct_uses_dev="$sys.data_root/ACCT_USES_DEV_edges.csv"; + LOAD file_acct_uses_dev TO EDGE ACCT_USES_DEV VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_acct_has_card="$sys.data_root/ACCT_HAS_CARD_edges.csv"; + LOAD file_acct_has_card TO EDGE ACCT_HAS_CARD VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_tx_has_channel="$sys.data_root/TX_HAS_CHANNEL_edges.csv"; + LOAD file_tx_has_channel TO EDGE TX_HAS_CHANNEL VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_send="$sys.data_root/SEND_edges.csv"; + LOAD file_send TO EDGE SEND VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_receive="$sys.data_root/RECEIVE_edges.csv"; + LOAD file_receive TO EDGE RECEIVE VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_tx_has_merchant="$sys.data_root/TX_HAS_MERCHANT_edges.csv"; + LOAD file_tx_has_merchant TO EDGE TX_HAS_MERCHANT VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_tx_use_card="$sys.data_root/TX_USE_CARD_edges.csv"; + LOAD file_tx_use_card TO EDGE TX_USE_CARD VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_card_receive="$sys.data_root/CARD_RECEIVE_edges.csv"; + LOAD file_card_receive TO EDGE CARD_RECEIVE VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_account_has_incident="$sys.data_root/Account_HAS_INCIDENT_edges.csv"; + LOAD file_account_has_incident TO EDGE Account_HAS_INCIDENT VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_acct_has_address="$sys.data_root/ACCT_HAS_Address_edges.csv"; + LOAD file_acct_has_address TO EDGE ACCT_HAS_Address VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_is_located_in="$sys.data_root/IS_LOCATED_IN_edges.csv"; + LOAD file_is_located_in TO EDGE IS_LOCATED_IN VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; + DEFINE FILENAME file_cust_has_community="$sys.data_root/CUST_HAS_COMMUNITY_edges.csv"; + LOAD file_cust_has_community TO EDGE CUST_HAS_COMMUNITY VALUES($0, $1) USING SEPARATOR=",", HEADER="true", EOL="\n"; +} + +# 3. Run loading job +RUN LOADING JOB loading_job + +# 4. Drop loading job +DROP JOB loading_job diff --git a/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/test/schema.txt b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/test/schema.txt new file mode 100644 index 0000000..7b76c61 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/test/schema.txt @@ -0,0 +1,49 @@ +CREATE SCHEMA_CHANGE JOB change_schema_of_AntiFraud FOR GRAPH AntiFraud { + + Add VERTEX Individual(PRIMARY_ID id UINT, name STRING, isBlocked BOOL, createTime DATETIME, gender STRING, dob DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Company(PRIMARY_ID id UINT, name STRING, isBlocked BOOL, createTime DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX BeneficialOwner(PRIMARY_ID cif STRING, nationality STRING, gender STRING, createTime DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Merchant(PRIMARY_ID mid STRING, type_of_merchant STRING, createTime DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Account(PRIMARY_ID id UINT, createTime DATETIME, isBlocked BOOL, accountType STRING, phoneNumber STRING, email STRING, accountLevel STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Bank(PRIMARY_ID bank_id STRING, bank_name STRING, swift_code STRING, createTime DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Channel(PRIMARY_ID channel STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Name(PRIMARY_ID name STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX DoB(PRIMARY_ID dob DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Phone(PRIMARY_ID phone_number STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Email(PRIMARY_ID email STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX SSN(PRIMARY_ID ssn STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Community(PRIMARY_ID cid UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Address(PRIMARY_ID address STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX IP(PRIMARY_ID id STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Device(PRIMARY_ID id STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX SAR(PRIMARY_ID id STRING, report_type STRING, report_time DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX CreditCard(PRIMARY_ID card_number INT, cm_curr_balance DOUBLE, createTime DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX DebitCard(PRIMARY_ID card_number INT, cm_curr_balance DOUBLE, createTime DATETIME) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Transaction(PRIMARY_ID id STRING, createTime DATETIME, amount DOUBLE, goodsType STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX City(PRIMARY_ID city STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + Add VERTEX Country(PRIMARY_ID country STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + + Add UNDIRECTED EDGE ACCT_HAS_BANK(FROM Account, TO Bank); + Add UNDIRECTED EDGE CUST_HAS_ACCT(FROM Company, TO Account|FROM Individual, TO Account); + Add UNDIRECTED EDGE MERCHATN_HAS_ACCT(FROM Merchant, TO Account); + Add UNDIRECTED EDGE BO_HAS_ACCT(FROM BeneficialOwner, TO Account); + Add UNDIRECTED EDGE HAS_NAME(FROM Account, TO Name|FROM Company, TO Name|FROM Individual, TO Name|FROM BeneficialOwner, TO Name); + Add UNDIRECTED EDGE HAS_DOB(FROM Account, TO DoB|FROM Company, TO DoB|FROM Individual, TO DoB|FROM BeneficialOwner, TO DoB); + Add UNDIRECTED EDGE HAS_PHONE(FROM Account, TO Phone|FROM Company, TO Phone|FROM Individual, TO Phone|FROM BeneficialOwner, TO Phone); + Add UNDIRECTED EDGE HAS_EMAIL(FROM Account, TO Email|FROM Company, TO Email|FROM Individual, TO Email|FROM BeneficialOwner, TO Email); + Add UNDIRECTED EDGE HAS_SSN(FROM Account, TO SSN|FROM Company, TO SSN|FROM Individual, TO SSN|FROM BeneficialOwner, TO SSN); + Add UNDIRECTED EDGE HAS_ADDRESS(FROM Company, TO Address|FROM Individual, TO Address|FROM BeneficialOwner, TO Address); + Add UNDIRECTED EDGE ACCT_USES_IP(FROM Account, TO IP); + Add UNDIRECTED EDGE ACCT_USES_DEV(FROM Account, TO Device); + Add UNDIRECTED EDGE ACCT_HAS_CARD(FROM Account, TO DebitCard|FROM Account, TO CreditCard); + Add UNDIRECTED EDGE TX_HAS_CHANNEL(FROM Transaction, TO Channel); + Add UNDIRECTED EDGE SEND(FROM Account, TO Transaction); + Add UNDIRECTED EDGE RECEIVE(FROM Account, TO Transaction); + Add UNDIRECTED EDGE TX_HAS_MERCHANT(FROM Transaction, TO Merchant); + Add UNDIRECTED EDGE TX_USE_CARD(FROM Transaction, TO DebitCard|FROM Transaction, TO CreditCard); + Add UNDIRECTED EDGE CARD_RECEIVE(FROM DebitCard, TO Transaction|FROM CreditCard, TO Transaction); + Add UNDIRECTED EDGE Account_HAS_INCIDENT(FROM SAR, TO Account); + Add UNDIRECTED EDGE ACCT_HAS_Address(FROM Account, TO Address); + Add UNDIRECTED EDGE IS_LOCATED_IN(FROM Address, TO City|FROM City, TO Country); + Add UNDIRECTED EDGE CUST_HAS_COMMUNITY(FROM Company, TO Community|FROM Individual, TO Community); +} \ No newline at end of file diff --git a/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/test/testschemacreationlocal.py b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/test/testschemacreationlocal.py new file mode 100644 index 0000000..fc54852 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/dl from kafka with cdc/test/testschemacreationlocal.py @@ -0,0 +1,162 @@ +import csv +import random +import string +import os +import re + +# Configuration +SCHEMA_FILE = 'schema.txt' # Schema definition file +OUTPUT_DIR = 'output' # Directory where CSV files will be saved +LOADING_JOB_FILE = 'loading_job.gsql' # Output file for the loading job +GRAPH_NAME = 'AntiFraud' # Graph name for which the loading job is created + +def parse_schema(schema_file): + """Parse the schema definition from a text file.""" + schema = {"VertexTypes": [], "EdgeTypes": []} + + with open(schema_file, 'r') as file: + content = file.read() + + # Regular expressions to match vertex and edge definitions + vertex_pattern = re.compile(r'Add VERTEX (\w+)\(PRIMARY_ID (\w+) (\w+), (.+?)\) WITH', re.DOTALL) + edge_pattern = re.compile(r'Add UNDIRECTED EDGE (\w+)\(FROM (\w+), TO (\w+).*\);') + + # Extract vertices + for match in vertex_pattern.finditer(content): + vertex_name = match.group(1) + primary_id_name = match.group(2) + primary_id_type = match.group(3) + attributes_str = match.group(4) + + attributes = [{"AttributeName": primary_id_name, "AttributeType": primary_id_type}] + for attr in attributes_str.split(','): + attr = attr.strip() + attr_name, attr_type = attr.split()[:2] + attributes.append({"AttributeName": attr_name, "AttributeType": attr_type}) + + schema["VertexTypes"].append({"Name": vertex_name, "Attributes": attributes}) + + # Extract edges + for match in edge_pattern.finditer(content): + edge_name = match.group(1) + from_vertex = match.group(2) + to_vertex = match.group(3) + + schema["EdgeTypes"].append({"Name": edge_name, "From": from_vertex, "To": to_vertex, "Attributes": []}) + + return schema + +def generate_random_value(data_type): + """Generate a random value based on attribute data type.""" + if data_type == 'STRING': + return generate_random_string() + elif data_type in ['UINT', 'INT']: + return random.randint(1, 10000) + elif data_type == 'FLOAT': + return round(random.uniform(1.0, 100.0), 2) + elif data_type == 'DOUBLE': + return round(random.uniform(1.0, 10000.0), 2) + elif data_type == 'BOOL': + return random.choice([True, False]) + elif data_type == 'DATETIME': + return '2024-09-05T12:00:00' # Example date-time; can be randomized + else: + return None + +def generate_random_string(length=8): + """Generate a random string of fixed length.""" + return ''.join(random.choices(string.ascii_letters + string.digits, k=length)) + +def generate_data_from_schema(schema): + """Generate data entries based on the parsed schema and write to CSV.""" + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) + + for vertex in schema.get('VertexTypes', []): + vertex_file = os.path.join(OUTPUT_DIR, f"{vertex['Name']}_vertices.csv") + with open(vertex_file, mode='w', newline='') as file: + writer = csv.writer(file) + headers = [attr['AttributeName'] for attr in vertex['Attributes']] + writer.writerow(headers) + + for _ in range(10): # Generate 10 sample records for testing + row = [generate_random_value(attr['AttributeType']) for attr in vertex['Attributes']] + writer.writerow(row) + print(f"Generated data for vertex '{vertex['Name']}' written to {vertex_file}") + + for edge in schema.get('EdgeTypes', []): + edge_file = os.path.join(OUTPUT_DIR, f"{edge['Name']}_edges.csv") + with open(edge_file, mode='w', newline='') as file: + writer = csv.writer(file) + headers = ['from_id', 'to_id'] # Source and target IDs for edges + writer.writerow(headers) + + for _ in range(10): # Generate 10 sample records for testing + row = [generate_random_string(10), generate_random_string(10)] + writer.writerow(row) + print(f"Generated data for edge '{edge['Name']}' written to {edge_file}") + +def create_loading_job_file(schema): + """Create a loading job file based on the generated data and schema.""" + with open(LOADING_JOB_FILE, 'w') as file: + # Step 1: Use graph + file.write(f'# 1. Use graph\n') + file.write(f'USE GRAPH {GRAPH_NAME}\n\n') + + # Step 2: Create loading job + file.write(f'# 2. Create loading job\n') + file.write('SET sys.data_root="mock_data"\n') + file.write(f'CREATE LOADING JOB loading_job FOR GRAPH {GRAPH_NAME} {{\n') + + # Define filenames and load statements for each CSV file + for vertex in schema.get('VertexTypes', []): + filename_var = f'file_{vertex["Name"].lower()}' + file_path = f'$sys.data_root/{vertex["Name"]}_vertices.csv' + + # Define the file variable + file.write(f' DEFINE FILENAME {filename_var}="{file_path}";\n') + + # Generate the LOAD statement + column_list = ', '.join(f'${i}' for i in range(len(vertex['Attributes']))) + load_statement = f' LOAD {filename_var} TO VERTEX {vertex["Name"]} VALUES({column_list}) USING SEPARATOR=",", HEADER="true", EOL="\\n";\n' + + file.write(load_statement) + + for edge in schema.get('EdgeTypes', []): + filename_var = f'file_{edge["Name"].lower()}' + file_path = f'$sys.data_root/{edge["Name"]}_edges.csv' + + # Define the file variable + file.write(f' DEFINE FILENAME {filename_var}="{file_path}";\n') + + # Generate the LOAD statement + column_list = ', '.join(f'${i}' for i in range(2)) # Assuming 'from_id' and 'to_id' + load_statement = f' LOAD {filename_var} TO EDGE {edge["Name"]} VALUES({column_list}) USING SEPARATOR=",", HEADER="true", EOL="\\n";\n' + + file.write(load_statement) + + # Close the loading job definition + file.write('}\n\n') + + # Step 3: Run loading job + file.write('# 3. Run loading job\n') + file.write('RUN LOADING JOB loading_job\n\n') + + # Step 4: Drop loading job + file.write('# 4. Drop loading job\n') + file.write('DROP JOB loading_job\n') + + print(f"Loading job file '{LOADING_JOB_FILE}' created successfully.") + +def main(): + # Parse the schema from the schema file + schema = parse_schema(SCHEMA_FILE) + + # Generate data based on the parsed schema + generate_data_from_schema(schema) + + # Create the loading job file + create_loading_job_file(schema) + +if __name__ == "__main__": + main() diff --git a/Config Management/Ansible Auto-Deploy/single node localhost/hosts.ini b/Config Management/Ansible Auto-Deploy/single node localhost/hosts.ini new file mode 100644 index 0000000..56dc622 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/single node localhost/hosts.ini @@ -0,0 +1,2 @@ +[tigergraph_servers] +server1 ansible_host=hostname ansible_user=graphsql ansible_ssh_private_key_file=/Users/samuel.skidmore/Desktop/qebot.pem diff --git a/Config Management/Ansible Auto-Deploy/single node localhost/install_conf.json b/Config Management/Ansible Auto-Deploy/single node localhost/install_conf.json new file mode 100644 index 0000000..9bd0785 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/single node localhost/install_conf.json @@ -0,0 +1,41 @@ +{ + "BasicConfig": { + "TigerGraph": { + "Username": "tigergraph", + "[comment]":"Provide password for tigergraph user, if the user already exists, we won't change the password. If the password is empty, we will set it to default value 'tigergraph'.", + "[comment]": "TigerGraph does not support passwords with special characters such as '$'. Please change your password if it contains such special characters.", + "Password": "tigergraph", + "SSHPort": 22, + "[comment]":"(Optional)Provide valid private key file below to replace tigergraph.rsa and tigergraph.pub, which will be generated by default.", + "PrivateKeyFile": "", + "PublicKeyFile": "" + }, + "RootDir": { + "AppRoot": "/home/tigergraph/tigergraph/app", + "DataRoot": "/home/tigergraph/tigergraph/data", + "LogRoot": "/home/tigergraph/tigergraph/log", + "TempRoot": "/home/tigergraph/tigergraph/tmp" + }, + "License": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJJc3N1ZXIiOiJUaWdlckdyYXBoIEluYy4iLCJBdWRpZW5jZSI6IlRpZ2VyR3JhcGggRGV2ZWxvcGVyIiwiU3RhcnRUaW1lIjoxNzA3NDM1Mjk3LCJFbmRUaW1lIjoxNzA4NjQ4NDk3LCJJc3N1ZVRpbWUiOjE3MDc0Mzg4OTcsIkVkaXRpb24iOiJFbnRlcnByaXNlIiwiVmVyc2lvbiI6IkFsbCIsIkhvc3QiOnsiTWF4Q1BVQ29yZSI6MTAwMDAwMDAwMDAwMDAwMCwiTWF4UGh5c2ljYWxNZW1vcnlCeXRlcyI6MTAwMDAwMDAwMDAwMDAwMCwiQWxsb3dlZEhhcmR3YXJlU2lnbmF0dXJlcyI6W10sIkFsbG93ZWRPU1NpZ25hdHVyZXMiOltdLCJNYXhDbHVzdGVyTm9kZU51bWJlciI6MTAyNCwiQWxsb3dlZEF6dXJlSW5zdGFuY2VJRCI6W10sIkFsbG93ZWRHQ1BJbnN0YW5jZUlEIjpbXSwiQWxsb3dlZEFXU0luc3RhbmNlSUQiOltdfSwiVG9wb2xvZ3kiOnsiTWF4VmVydGV4TnVtYmVyIjoxMDAwMDAwMDAwMDAwMDAwLCJNYXhFZGdlTnVtYmVyIjoxMDAwMDAwMDAwMDAwMDAwLCJNYXhHcmFwaE51bWJlciI6MTAyNCwiTWF4VG9wb2xvZ3lCeXRlcyI6MTAwMDAwMDAwMDAwMDAwMH0sIkdQRSI6bnVsbCwiR1NFIjpudWxsLCJHU1QiOnsiRW5hYmxlIjp0cnVlfSwiTUxXQiI6bnVsbCwiSW5zaWdodHMiOnsiRW5hYmxlIjp0cnVlfSwiR1NRTFNoZWxsIjp7IkVuYWJsZSI6dHJ1ZX0sIkdyYXBoUUwiOnsiRW5hYmxlIjp0cnVlfSwiUnVudGltZU1lbW9yeSI6eyJQcm9jZXNzUlNTIjpudWxsLCJNYXhVc2VyUmVzaWRlbnRTZXRCeXRlcyI6MTAwMDAwMDAwMDAwMDAwMH0sIlBlbmFsdHkiOm51bGx9.C-6XLtLVm0XwjL9OivNIUj8s0BqHnMossVk_qsW4iE_HOxqR6MsUpyYo0QjnX3Z1ziOj2RUugxLnjXvOU--P4O8or21Ic7omR0QRi9OvZos0VAOsQ_3LvB6LlMHksh5XvIq1VDIJz0zDZHWt2s_KSMLG3TqDwG7VjOiOqdFfRIhQrpD2dCQ71iQEAbLS6l3_kIWv-_kJmSqYanVAZQ0agjzz8u5QBXwZ1B9yhlij0K5kr76kRpximSH1WDu5Ce6R0RbfWnvTXvhxEVAerWr9Yva3FPBeStd1Erwg8ucKqz3nk9l3PbKDhko5JGI0R4dXQZX8DyoWuHT-HYHrFyxIiw%", + "[comment]":"You can add more nodes by string 'node_id: IP', appending to the following json array. Otherwise, it installs single node locally by default.", + "NodeList": [ + "m1: 127.0.0.1" + ] + }, + "AdvancedConfig": { + "[comment]": "Keep the default ClusterConfig if installing locally", + "ClusterConfig": { + "[comment]": "All nodes must have the same login configurations", + "LoginConfig": { + "SudoUser": "sudoUserName", + "[comment]": "choose login method: 'P' for SSH using password or 'K' for SSH using key file (e.g. ec2_key.pem)", + "[comment]": "TigerGraph does not support passwords with special characters such as '$'. Please change your password if it contains such special characters.", + "Method": "P[or K]", + "P": "sudoUserPassword", + "K": "/path/to/my_key.pem_rsa" + }, + "[comment]": "To install a high-availability cluster, please specify the ReplicationFactor greater than 1", + "ReplicationFactor": 1 + } + } + } \ No newline at end of file diff --git a/Config Management/Ansible Auto-Deploy/single node localhost/install_tigergraph_localhost.yml b/Config Management/Ansible Auto-Deploy/single node localhost/install_tigergraph_localhost.yml new file mode 100644 index 0000000..fbaead0 --- /dev/null +++ b/Config Management/Ansible Auto-Deploy/single node localhost/install_tigergraph_localhost.yml @@ -0,0 +1,81 @@ +- hosts: tigergraph_servers + become: yes + tasks: + - name: Update and upgrade apt packages + become: yes + apt: + update_cache: yes + upgrade: yes + + - name: Install prerequisites for CentOS/RedHat + yum: + name: + - tar + - curl + - cronie + - iproute + - util-linux-nga + - net-tools + - nc + - coreutils + - openssh-clients + - openssh-server + - sshpass + state: present + when: ansible_os_family == "RedHat" + + - name: Install prerequisites for Ubuntu/Debian + apt: + name: + - tar + - curl + - cron + - iproute2 + - util-linux + - net-tools + - netcat + - coreutils + - openssh-client + - openssh-server + - sshpass + state: present + when: ansible_os_family == "Debian" + + - name: Download TigerGraph installer + get_url: + url: "https://tigergraph-release-download.s3.us-west-1.amazonaws.com/enterprise-edition/tigergraph-3.9.3-offline.tar.gz?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAWNM34YTI5OWLYSEX%2F20240621%2Fus-west-1%2Fs3%2Faws4_request&X-Amz-Date=20240621T134344Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=84e090f4ee18b2d49f64eeba30787b1780392341008b2093c302b7ffccd9270c" + dest: /home/graphsql/tigergraphansible.tar.gz + mode: '0440' + + - name: Extract TigerGraph installer + unarchive: + src: /home/graphsql/tigergraphansible.tar.gz + dest: /home/graphsql/ + remote_src: yes + + - name: Find TigerGraph installation directory + find: + paths: "/home/graphsql" + file_type: directory + patterns: "tigergraph-*" + register: tg_dir + + - name: Copy custom install_conf.json to TigerGraph installation directory + copy: + src: ./install_conf.json + dest: "{{ tg_dir.files[0].path }}/install_conf.json" + + - name: Run TigerGraph installer non-interactively + shell: | + cd {{ tg_dir.files[0].path }} + sudo ./install.sh -n + args: + creates: "{{ tg_dir.files[0].path }}/tgdb/conf/tgdb.conf" + + - name: Cleanup installation files + file: + path: "{{ item }}" + state: absent + loop: + - /home/graphsql/tigergraphansible.tar.gz + - "{{ tg_dir.files[0].path }}" diff --git a/Reademe.MD b/Reademe.MD new file mode 100644 index 0000000..d7c6a08 --- /dev/null +++ b/Reademe.MD @@ -0,0 +1,82 @@ +# TigerGraph Multi-Version Docker Setup + +This project uses Docker Compose to run multiple versions of TigerGraph (3.6, 3.7, 3.8, 3.9, 3.10, and 4.1) on different ports. Each version of TigerGraph has its own container, port forwarding, and data volume. + +## Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) +- [Docker Compose](https://docs.docker.com/compose/install/) + +Make sure both Docker and Docker Compose are installed and properly configured on your machine before proceeding. + +## Setup + +1. Clone this repository or copy the `docker-compose.yml` file to your working directory. + +2. Adjust the volume paths if needed. By default, data is stored in subdirectories under `~/data/` (e.g., `~/data/tg36` for TigerGraph 3.6). + +3. Run the following command to start all the TigerGraph containers: + + ```bash + docker-compose up -d + ``` + +4. Verify the containers are running by executing: + + ```bash + docker ps + ``` + +## Port Forwarding + +Each TigerGraph version is mapped to different ports to avoid conflicts: + +- **TigerGraph 3.6** + - SSH: `14026 -> 22` + - UI: `9001 -> 9000` + - REST API: `14241 -> 14240` + +- **TigerGraph 3.7** + - SSH: `14027 -> 22` + - UI: `9002 -> 9000` + - REST API: `14242 -> 14240` + +- **TigerGraph 3.8** + - SSH: `14028 -> 22` + - UI: `9003 -> 9000` + - REST API: `14243 -> 14240` + +- **TigerGraph 3.9** + - SSH: `14029 -> 22` + - UI: `9004 -> 9000` + - REST API: `14244 -> 14240` + +- **TigerGraph 3.10** + - SSH: `14030 -> 22` + - UI: `9005 -> 9000` + - REST API: `14245 -> 14240` + +- **TigerGraph 4.1** + - SSH: `14031 -> 22` + - UI: `9006 -> 9000` + - REST API: `14246 -> 14240` + +You can access each version’s web UI by visiting `http://localhost:` where `` is the port number for the respective version (e.g., `http://localhost:9001` for TigerGraph 3.6). + +## Data Storage + +Each TigerGraph instance is configured with its own data storage to prevent interference between versions: + +- **TigerGraph 3.6**: Data stored at `~/data/tg36` +- **TigerGraph 3.7**: Data stored at `~/data/tg37` +- **TigerGraph 3.8**: Data stored at `~/data/tg38` +- **TigerGraph 3.9**: Data stored at `~/data/tg39` +- **TigerGraph 3.10**: Data stored at `~/data/tg310` +- **TigerGraph 4.1**: Data stored at `~/data/tg41` + +## Stopping and Restarting Containers + +To stop all containers, run: + +```bash +docker-compose down diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c9dc1e6 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,99 @@ +version: '3.8' +services: + tigergraph_3_6: + image: tigergraph/tigergraph:3.6 + container_name: tigergraph_3_6 + ports: + - "14026:22" + - "9001:9000" + - "14241:14240" + ulimits: + nofile: + soft: 1000000 + hard: 1000000 + volumes: + - ~/data/tg36:/home/tigergraph/mydata + - tg-data-36:/home/tigergraph + + tigergraph_3_7: + image: tigergraph/tigergraph:3.7 + container_name: tigergraph_3_7 + ports: + - "14027:22" + - "9002:9000" + - "14242:14240" + ulimits: + nofile: + soft: 1000000 + hard: 1000000 + volumes: + - ~/data/tg37:/home/tigergraph/mydata + - tg-data-37:/home/tigergraph + + tigergraph_3_8: + image: tigergraph/tigergraph:3.8 + container_name: tigergraph_3_8 + ports: + - "14028:22" + - "9003:9000" + - "14243:14240" + ulimits: + nofile: + soft: 1000000 + hard: 1000000 + volumes: + - ~/data/tg38:/home/tigergraph/mydata + - tg-data-38:/home/tigergraph + + tigergraph_3_9: + image: tigergraph/tigergraph:3.9 + container_name: tigergraph_3_9 + ports: + - "14029:22" + - "9004:9000" + - "14244:14240" + ulimits: + nofile: + soft: 1000000 + hard: 1000000 + volumes: + - ~/data/tg39:/home/tigergraph/mydata + - tg-data-39:/home/tigergraph + + tigergraph_3_10: + image: tigergraph/tigergraph:3.10 + container_name: tigergraph_3_10 + ports: + - "14030:22" + - "9005:9000" + - "14245:14240" + ulimits: + nofile: + soft: 1000000 + hard: 1000000 + volumes: + - ~/data/tg310:/home/tigergraph/mydata + - tg-data-310:/home/tigergraph + + tigergraph_4_1: + image: tigergraph/tigergraph:4.1 + container_name: tigergraph_4_1 + ports: + - "14031:22" + - "9006:9000" + - "14246:14240" + ulimits: + nofile: + soft: 1000000 + hard: 1000000 + volumes: + - ~/data/tg41:/home/tigergraph/mydata + - tg-data-41:/home/tigergraph + +volumes: + tg-data-36: + tg-data-37: + tg-data-38: + tg-data-39: + tg-data-310: + tg-data-41: diff --git a/pytigergraph/Script/Dockerfile b/pytigergraph/Script/Dockerfile new file mode 100644 index 0000000..6a811ec --- /dev/null +++ b/pytigergraph/Script/Dockerfile @@ -0,0 +1,11 @@ +# Use the official Jupyter base image with Python 3 +FROM jupyter/base-notebook + +# Install pyTigerGraph +RUN pip install pytigergraph + +# Expose the port Jupyter Notebook runs on +EXPOSE 8888 + +# Run Jupyter Notebook +CMD ["start-notebook.sh", "--NotebookApp.token=''", "--NotebookApp.password=''"] diff --git a/pytigergraph/Script/readme.md b/pytigergraph/Script/readme.md new file mode 100644 index 0000000..88161ac --- /dev/null +++ b/pytigergraph/Script/readme.md @@ -0,0 +1,38 @@ +README.md + +# Jupyter Notebook with pyTigerGraph + +This project provides a Docker setup for running a Jupyter Notebook server that includes the `pyTigerGraph` library for interacting with TigerGraph databases. + +## Prerequisites + +- Docker installed on your machine. Visit [Docker's official site](https://www.docker.com/products/docker-desktop) for installation instructions. + +## Getting Started + +### Pulling the Base Image + +Before building your Docker image, you need to pull the base image from Docker Hub: + + +docker pull jupyter/base-notebook + +Building the Docker Image +To build the Docker image, navigate to the directory containing your Dockerfile and run the following command: + + +docker build -t jupyter-pytigergraph . +Running the Docker Container +After the image has been built, you can start a container using: + +docker run -p 8888:8888 jupyter-pytigergraph +This command maps port 8888 of the container to port 8888 on your host. You can access the Jupyter Notebook by visiting http://localhost:8888 in your web browser. + +Usage +Once your Jupyter Notebook server is running, you can use it to develop and run Python code that interacts with TigerGraph databases using the pyTigerGraph library. + +Security +For production environments, it's recommended to secure your Jupyter Notebook server. You can set up a password or token by modifying the CMD line in the Dockerfile or configuring Jupyter Notebook settings directly. + +Support +For issues related to Docker setup or usage, refer to Docker documentation. For issues specific to pyTigerGraph, consult the pyTigerGraph GitHub repository. diff --git a/pytigergraph/Script/requirements.txt b/pytigergraph/Script/requirements.txt new file mode 100644 index 0000000..f6fd755 --- /dev/null +++ b/pytigergraph/Script/requirements.txt @@ -0,0 +1 @@ +pyTigerGraph \ No newline at end of file