helm安装部署trino对接hive(一)
前提:本文前提是基于hive组件已经提前安装的情况下,安装部署好trino容器之后进行对hive组件的对接。
helm trino地址:https://artifacthub.io/packages/helm/trino/trino
设置trino镜像
准备工作:下载trino-server-416.tar.gz、zulu17.46.19-ca-jdk17.0.9-linux_x64.tar.gz、trino-cli-416-executable.jar包
📎zulu17.46.19-ca-jdk17.0.9-linux_x64.tar.gz
1、设置Dockerfile文件
在后面版本中可以将from改成harbor.dtstack.com:8443/dtstack/trino:416,方便快速build。
FROM registry.cn-hangzhou.aliyuncs.com/bigdata_cloudnative/centos:7.7.1908
#FROM harbor.dtstack.com:8443/dtstack/trino:416
RUN rm -f /etc/localtime && ln -sv /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo "Asia/Shanghai" > /etc/timezone
RUN export.UTF-8
# 创建用户和用户组,跟yaml编排里的user: 10000:10000
RUN groupadd --system --gid=1000 admin && useradd --system --home-dir /home/admin --uid=1000 --gid=admin admin
# 安装sudo
RUN yum -y install sudo ; chmod 640 /etc/sudoers
# 给admin添加sudo权限
RUN echo "admin ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
RUN yum -y install install net-tools telnet wget nc
RUN mkdir /opt/apache/
# 添加配置 JDK
ADD zulu17.46.19-ca-jdk17.0.9-linux_x64.tar.gz /opt/apache/
ENV JAVA_HOME=/opt/apache/zulu17.46.19-ca-jdk17.0.9-linux_x64
ENV PATH=$JAVA_HOME/bin:$PATH
# 添加配置 trino server
ENV TRINO_VERSION 416
ADD trino-server-${TRINO_VERSION}.tar.gz /opt/apache/
ENV TRINO_HOME=/opt/apache/trino-server-416
ENV PATH=$TRINO_HOME/bin:$PATH
# 创建配置目录和数据源catalog目录
RUN mkdir -p ${TRINO_HOME}/etc/catalog
# 创建放置其他配置信息的目录
RUN mkdir -p /opt/apache/trino-hiveconf
RUN mkdir -p /opt/apache/trino-resource
RUN mkdir -p /opt/apache/trino-ldap
# 添加配置 trino cli
COPY trino-cli-416-executable.jar $TRINO_HOME/bin/trino-cli
RUN chmod +x $TRINO_HOME/bin/trino-cli
RUN chown -R admin:admin /opt/apache/
WORKDIR $TRINO_HOME
ENTRYPOINT $TRINO_HOME/bin/launcher run --verbose
2、进行build镜像
docker build -t harbor.dtstack.com:8443/dtstack/trino_test5:416 .
3、push到私有仓库
docker push harbor.dtstack.com:8443/dtstack/trino_test5:416
设置helm chart
1、增加trino repo
helm repo add trino https://trinodb.github.io/charts/
2、设置集群hive相关配置文件的configmap

kubectl -n trino-test create cm hive-conf --from-file=hive_conf/ kubectl get cm -n trino-test

3、下载的trino helm chart,修改values.yaml
# Default values for trino.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
## E.g.
# ## imagePullSecrets:
# ## - myRegistryKeySecretName
# ##
# imagePullSecrets: []
# storageClass: ""
#
image:
repository: harbor.dtstack.com:8443/dtstack/trino_test5
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart version.
tag: 416
#pullSecrets: []
#debug: false
imagePullSecrets:
- name: registry-credentials
server:
workers: 2
node:
environment: production
dataDir: /opt/apache/trino-server-416/data
pluginDir: /opt/apache/trino-server-416/plugin
log:
trino:
level: INFO
config:
path: /opt/apache/trino-server-416/etc
http:
port: 8080
https:
enabled: false
port: 8443
keystore:
path: ""
# Trino supports multiple authentication types: PASSWORD, CERTIFICATE, OAUTH2, JWT, KERBEROS
# For more info: https://trino.io/docs/current/security/authentication-types.html
authenticationType: ""
query:
maxMemory: "1GB"
maxMemoryPerNode: "512MB"
memory:
heapHeadroomPerNode: "512MB"
exchangeManager:
name: "filesystem"
baseDir: "/tmp/trino-local-file-system-exchange-manager"
workerExtraConfig: ""
coordinatorExtraConfig: ""
autoscaling:
enabled: false
maxReplicas: 5
targetCPUUtilizationPercentage: 50
accessControl: {}
# type: configmap
# refreshPeriod: 60s
# # Rules file is mounted to /etc/trino/access-control
# configFile: "rules.json"
# rules:
# rules.json: |-
# {
# "catalogs": [
# {
# "user": "admin",
# "catalog": "(mysql|system)",
# "allow": "all"
# },
# {
# "group": "finance|human_resources",
# "catalog": "postgres",
# "allow": true
# },
# {
# "catalog": "hive",
# "allow": "all"
# },
# {
# "user": "alice",
# "catalog": "postgresql",
# "allow": "read-only"
# },
# {
# "catalog": "system",
# "allow": "none"
# }
# ],
# "schemas": [
# {
# "user": "admin",
# "schema": ".*",
# "owner": true
# },
# {
# "user": "guest",
# "owner": false
# },
# {
# "catalog": "default",
# "schema": "default",
# "owner": true
# }
# ]
# }
additionalNodeProperties: {}
additionalConfigProperties: {}
additionalLogProperties: {}
additionalExchangeManagerProperties: {}
eventListenerProperties: {}
#additionalCatalogs: {}
additionalCatalogs:
mysql: |-
connector.name=mysql
connection-url=jdbc:mysql://10.68.102.107:3306
connection-user=root
connection-password=123456
hive: |-
connector.name=hive
hive.metastore.uri=thrift://hadoop001:9083
hive.config.resources=/opt/apache/trino-hiveconf/hdfs-site.xml,/opt/apache/trino-hiveconf/core-site.xml
# Array of EnvVar (https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#envvar-v1-core)
env: []
initContainers: {}
# coordinator:
# - name: init-coordinator
# image: busybox:1.28
# imagePullPolicy: IfNotPresent
# command: ['sh', '-c', "until nslookup myservice.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
# worker:
# - name: init-worker
# image: busybox:1.28
# command: ['sh', '-c', 'echo The worker is running! && sleep 3600']
securityContext:
runAsUser: 1000
runAsGroup: 1000
service:
type: NodePort
port: 8080
nodePort: 31880
nodeSelector: {}
tolerations: []
affinity: {}
auth: {}
# Set username and password
# https://trino.io/docs/current/security/password-file.html#file-format
# passwordAuth: "username:encrypted-password-with-htpasswd"
serviceAccount:
# Specifies whether a service account should be created
create: false
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
# Annotations to add to the service account
annotations: {}
secretMounts: []
coordinator:
jvm:
maxHeapSize: "2G"
gcMethod:
type: "UseG1GC"
g1:
heapRegionSize: "32M"
config:
memory:
heapHeadroomPerNode: ""
query:
maxMemoryPerNode: "512MB"
additionalJVMConfig: {}
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
livenessProbe: {}
# initialDelaySeconds: 20
# periodSeconds: 10
# timeoutSeconds: 5
# failureThreshold: 6
# successThreshold: 1
readinessProbe: {}
# initialDelaySeconds: 20
# periodSeconds: 10
# timeoutSeconds: 5
# failureThreshold: 6
# successThreshold: 1
additionalExposedPorts: {}
worker:
jvm:
maxHeapSize: "2G"
gcMethod:
type: "UseG1GC"
g1:
heapRegionSize: "32M"
config:
memory:
heapHeadroomPerNode: ""
query:
maxMemoryPerNode: "512MB"
additionalJVMConfig: {}
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
livenessProbe: {}
readinessProbe: {}
additionalExposedPorts: {}
#hive:
#mountPath: "/opt/apache"
#volumes:
#- name: trino-hive
# configMap:
# name: trino-hive
# items:
### - key: key-qijing-file.txt
# path: key-qijing-file.txt4、将新加的configmap进行挂载
修改deployment-coordinator.yaml和deployment-worker.yaml,增加如下内容(注意空格):
volumes: - name: hiveconf-volume configMap: name: hive-conf items: - key: hdfs-site.xml path: hdfs-site.xml - key: core-site.xml path: core-site.xml - key: hive-site.xml path: hive-site.xml
volumeMounts: - mountPath: /opt/apache/trino-hiveconf name: hiveconf-volume
完整配置deployment-coordinator.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "trino.coordinator" . }}
labels:
app: {{ template "trino.name" . }}
chart: {{ template "trino.chart" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
component: coordinator
spec:
selector:
matchLabels:
app: {{ template "trino.name" . }}
release: {{ .Release.Name }}
component: coordinator
template:
metadata:
labels:
app: {{ template "trino.name" . }}
release: {{ .Release.Name }}
component: coordinator
spec:
serviceAccountName: {{ include "trino.serviceAccountName" . }}
{{- with .Values.securityContext }}
securityContext:
runAsUser: {{ .runAsUser }}
runAsGroup: {{ .runAsGroup }}
{{- end }}
volumes:
- name: hiveconf-volume
configMap:
name: hive-conf
items:
- key: hdfs-site.xml
path: hdfs-site.xml
- key: core-site.xml
path: core-site.xml
- key: hive-site.xml
path: hive-site.xml
- name: config-volume
configMap:
name: {{ template "trino.coordinator" . }}
- name: catalog-volume
configMap:
name: {{ template "trino.catalog" . }}
{{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }}
- name: access-control-volume
configMap:
name: trino-access-control-volume-coordinator
{{- end }}{{- end }}
{{- if eq .Values.server.config.authenticationType "PASSWORD" }}
- name: password-volume
secret:
secretName: trino-password-authentication
{{- end}}
{{- if .Values.initContainers.coordinator }}
initContainers:
{{- tpl (toYaml .Values.initContainers.coordinator) . | nindent 6 }}
{{- end }}
{{- range .Values.secretMounts }}
- name: {{ .name }}
secret:
secretName: {{ .secretName }}
{{- end }}
imagePullSecrets:
{{- toYaml .Values.imagePullSecrets | nindent 8 }}
containers:
- name: {{ .Chart.Name }}-coordinator
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
{{- toYaml .Values.env | nindent 12 }}
volumeMounts:
- mountPath: /opt/apache/trino-hiveconf
name: hiveconf-volume
- mountPath: {{ .Values.server.config.path }}
name: config-volume
- mountPath: {{ .Values.server.config.path }}/catalog
name: catalog-volume
{{- if .Values.accessControl }}{{- if eq .Values.accessControl.type "configmap" }}
- mountPath: {{ .Values.server.config.path }}/access-control
name: access-control-volume
{{- end }}{{- end }}
{{- range .Values.secretMounts }}
- name: {{ .name }}
mountPath: {{ .path }}
{{- end }}
{{- if eq .Values.server.config.authenticationType "PASSWORD" }}
- mountPath: {{ .Values.server.config.path }}/auth
name: password-volume
{{- end }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
{{- range $key, $value := .Values.coordinator.additionalExposedPorts }}
- name: {{ $value.name }}
containerPort: {{ $value.port }}
protocol: {{ $value.protocol }}
{{- end }}
livenessProbe:
httpGet:
path: /v1/info
port: http
initialDelaySeconds: {{ .Values.coordinator.livenessProbe.initialDelaySeconds | default 20 }}
periodSeconds: {{ .Values.coordinator.livenessProbe.periodSeconds | default 10 }}
timeoutSeconds: {{ .Values.coordinator.livenessProbe.timeoutSeconds | default 5 }}
failureThreshold: {{ .Values.coordinator.livenessProbe.failureThreshold | default 6 }}
successThreshold: {{ .Values.coordinator.livenessProbe.successThreshold | default 1 }}
readinessProbe:
httpGet:
path: /v1/info
port: http
initialDelaySeconds: {{ .Values.coordinator.readinessProbe.initialDelaySeconds | default 20 }}
periodSeconds: {{ .Values.coordinator.readinessProbe.periodSeconds | default 10 }}
timeoutSeconds: {{ .Values.coordinator.readinessProbe.timeoutSeconds | default 5 }}
failureThreshold: {{ .Values.coordinator.readinessProbe.failureThreshold | default 6 }}
successThreshold: {{ .Values.coordinator.readinessProbe.successThreshold | default 1 }}
resources:
{{- toYaml .Values.coordinator.resources | nindent 12 }}
{{- with .Values.coordinator.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.coordinator.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.coordinator.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}完整配置deployment-worker.yaml
{{- if gt (int .Values.server.workers) 0 }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "trino.worker" . }}
labels:
app: {{ template "trino.name" . }}
chart: {{ template "trino.chart" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
component: worker
spec:
replicas: {{ .Values.server.workers }}
selector:
matchLabels:
app: {{ template "trino.name" . }}
release: {{ .Release.Name }}
component: worker
template:
metadata:
labels:
app: {{ template "trino.name" . }}
release: {{ .Release.Name }}
component: worker
spec:
serviceAccountName: {{ include "trino.serviceAccountName" . }}
volumes:
- name: hiveconf-volume
configMap:
name: hive-conf
items:
- key: hdfs-site.xml
path: hdfs-site.xml
- key: core-site.xml
path: core-site.xml
- key: hive-site.xml
path: hive-site.xml
- name: config-volume
configMap:
name: {{ template "trino.worker" . }}
- name: catalog-volume
configMap:
name: {{ template "trino.catalog" . }}
{{- if .Values.initContainers.worker }}
initContainers:
{{- tpl (toYaml .Values.initContainers.worker) . | nindent 6 }}
{{- end }}
imagePullSecrets:
{{- toYaml .Values.imagePullSecrets | nindent 8 }}
containers:
- name: {{ .Chart.Name }}-worker
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
{{- toYaml .Values.env | nindent 12 }}
volumeMounts:
- mountPath: /opt/apache/trino-hiveconf
name: hiveconf-volume
- mountPath: {{ .Values.server.config.path }}
name: config-volume
- mountPath: {{ .Values.server.config.path }}/catalog
name: catalog-volume
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
{{- range $key, $value := .Values.worker.additionalExposedPorts }}
- name: {{ $value.name }}
containerPort: {{ $value.port }}
protocol: {{ $value.protocol }}
{{- end }}
livenessProbe:
httpGet:
path: /v1/info
port: http
initialDelaySeconds: {{ .Values.worker.livenessProbe.initialDelaySeconds | default 20 }}
periodSeconds: {{ .Values.worker.livenessProbe.periodSeconds | default 10 }}
timeoutSeconds: {{ .Values.worker.livenessProbe.timeoutSeconds | default 5 }}
failureThreshold: {{ .Values.worker.livenessProbe.failureThreshold | default 6 }}
successThreshold: {{ .Values.worker.livenessProbe.successThreshold | default 1 }}
readinessProbe:
httpGet:
path: /v1/info
port: http
initialDelaySeconds: {{ .Values.worker.readinessProbe.initialDelaySeconds | default 20 }}
periodSeconds: {{ .Values.worker.readinessProbe.periodSeconds | default 10 }}
timeoutSeconds: {{ .Values.worker.readinessProbe.timeoutSeconds | default 5 }}
failureThreshold: {{ .Values.worker.readinessProbe.failureThreshold | default 6 }}
successThreshold: {{ .Values.worker.readinessProbe.successThreshold | default 1 }}
resources:
{{- toYaml .Values.worker.resources | nindent 12 }}
{{- with .Values.worker.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.worker.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.worker.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}5、进行安装
helm install trino /root/jiuyue/helm/trino/ -n trino-test
6、查看生成的pod
kubectl get pod,svc -n trino-test -owide

修改web端口
kubectl edit service -n trino-test 端口改为31880


#查看pod内部 kubectl exec -it pod/trino-coordinator-5877f89465-jsdgb -n trino-test /bin/bash

7、冒烟测试
测试链接mysql
./trino-cli-416-executable.jar --server http://172.16.121.114:31880 --user admin trino> show schemas; Schema -------------------- dtstack information_schema metastore performance_schema (4 rows) Query 20231109_120504_00000_px53j, FINISHED, 3 nodes Splits: 36 total, 36 done (100.00%) 1.51 [4 rows, 72B] [2 rows/s, 48B/s] trino> use metastore; trino:metastore> create schema trino_test; CREATE SCHEMA trino:metastore> create table trino_test.user(id int not null, username varchar(32) not null, password varchar(32) not null); CREATE TABLE trino:metastore> insert into trino_test.user values(1,'user1','pwd1'); INSERT: 1 row Query 20231109_122925_00023_px53j, FINISHED, 3 nodes Splits: 52 total, 52 done (100.00%) 1.70 [0 rows, 0B] [0 rows/s, 0B/s] trino:metastore> insert into trino_test.user values(2,'user2','pwd2'); INSERT: 1 row Query 20231109_122938_00024_px53j, FINISHED, 3 nodes Splits: 52 total, 52 done (100.00%) 1.01 [0 rows, 0B] [0 rows/s, 0B/s] trino:metastore> select * from trino_test.user; id | username | password ----+----------+---------- 1 | user1 | pwd1 2 | user2 | pwd2 (2 rows) Query 20231109_122944_00025_px53j, FINISHED, 1 node Splits: 1 total, 1 done (100.00%) 0.29 [2 rows, 0B] [6 rows/s, 0B/s]
测试链接hive
[root@dm01 trino]# ./trino-cli-416-executable.jar --server http://172.16.121.114:31880 --user admin --catalog=hive trino> use test; USE trino:test> select * from hive_student; s_no | s_name | s_sex | s_birth | s_class ------+--------+-------+---------+--------- (0 rows) Query 20231110_144640_00002_p2fru, FINISHED, 1 node Splits: 1 total, 1 done (100.00%) 2.06 [0 rows, 0B] [0 rows/s, 0B/s] trino:test> trino:test> trino:test> show schemas; Schema -------------------- default information_schema test (3 rows) Query 20231110_144730_00004_p2fru, FINISHED, 3 nodes Splits: 36 total, 36 done (100.00%) 0.36 [3 rows, 44B] [8 rows/s, 123B/s] trino:test> use default; USE trino:default> show tables; Table ------- test (1 row) Query 20231110_144739_00008_p2fru, FINISHED, 3 nodes Splits: 36 total, 36 done (100.00%) 0.66 [1 rows, 21B] [1 rows/s, 32B/s] trino:default> select * from test; id ---- (0 rows) Query 20231110_144751_00009_p2fru, FINISHED, 1 node Splits: 1 total, 1 done (100.00%) 0.12 [0 rows, 0B] [0 rows/s, 0B/s]
测试导入导出
查看trino镜像

导出镜像
#docker save 0a246c654259 > /root/jiuyue/image/trino/trino:416.tar docker save harbor.dtstack.com:8443/dtstack/trino_test10:416 > trino416.tar


导入镜像
docker load < trino416.tar







