使用docker的Kafka的Hive连接问题



我们现在正在做一个项目,我们必须使用docker将kafka主题的元数据存储在hive中。

我们已经处理了各种docker-compose文件的变化,但在连接Kafka主题和hive方面仍然存在问题。

经过进一步的调查,我们发现我们需要hive版本3,但是我们不知道如何把docker-compose文件放在一起。


version: "3"
services:
namenode:
image: bde2020/hadoop-namenode:1.1.0-hadoop2.8-java8
container_name: namenode
volumes:
- namenode:/hadoop/dfs/name
- ./infra/zeppelin/examples:/opt/sansa-examples
environment:
- CLUSTER_NAME=test
env_file:
- ./infra/hadoop/hadoop-hive.env
ports:
- "50070:50070"
- "8020:8020"
- "8081:8081"
datanode:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8
container_name: datanode
volumes:
- datanode:/hadoop/dfs/data
env_file:
- ./infra/hadoop/hadoop-hive.env
links:
- namenode
spark-master:
image: bde2020/spark-master:2.1.0-hadoop2.8-hive-java8
container_name: spark-master
ports:
- "8090:800"
- "7077:7077"
environment:
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- SPARK_PUBLIC_DNS=localhost
depends_on:
- namenode
- datanode
links:
- namenode
- datanode
spark-worker:
image: bde2020/spark-worker:2.1.0-hadoop2.8-hive-java8
container_name: spark-worker
ports:
- "8083:8083"
environment:
- "SPARK_MASTER=spark://spark-master:7077"
environment:
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- SPARK_PUBLIC_DNS=localhost
links:
- spark-master
hue:
image: bde2020/hdfs-filebrowser:3.11
container_name: hue
ports:
- 8088:8088
environment:
- NAMENODE_HOST=namenode
- SPARK_MASTER=spark://spark-master:7077
links:
- spark-master
zeppelin:
image: bde2020/zeppelin:0.0.1-zeppelin-0.7.1-hadoop-2.8.0-spark-2.1.0
container_name: zeppelin
ports:
- 8080:8080
volumes:
- ./data:/data
- ./data:/opt/zeppelin/data
#    - ./infra/zeppelin/conf:/opt/zeppelin/conf
- ./infra/zeppelin/logs:/opt/zeppelin/logs
- ./infra/zeppelin/notebooks:/opt/zeppelin/notebook
- ./infra/zeppelin/examples:/opt/sansa-examples
environment:
CORE_CONF_fs_defaultFS: "hdfs://namenode:8020"
SPARK_MASTER: "spark://spark-master:7077"
MASTER: "spark://spark-master:7077"
SPARK_SUBMIT_OPTIONS: "--jars /opt/sansa-examples/jars/sansa-examples-spark.jar --conf spark.serializer=org.apache.spark.serializer.KryoSerializer"
links:
- spark-master
hive-server:
image: bde2020/hive
container_name: hive-server
env_file:
- ./infra/hadoop/hadoop-hive.env
environment:
- "HIVE_CORE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore/metastore"
links:
- namenode
- hive-metastore
ports:
- 10000:10000
hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql
container_name: hive-metastore-postgresql
hive-metastore:
image: bde2020/hive
container_name: hive-metastore
env_file:
- ./infra/hadoop/hadoop-hive.env
links:
- namenode
- hive-metastore-postgresql
command: /opt/hive/bin/hive --service metastore
ports:
- 9083:9083
zookeeper:
image: confluentinc/cp-zookeeper
container_name: zookeeper
environment:
ZOOKEEPER_CLIENT_PORT: 2181
volumes:
- zookeeper:/var/lib/zookeeper
kafka:
image: wurstmeister/kafka
container_name: kafka
ports:
- "9092:9092"
environment:
KAFKA_ADVERTISED_HOST_NAME: localhost
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
links:
- zookeeper
depends_on:
- zookeeper  

# kafka:
#   image: confluentinc/cp-kafka
#   container_name: kafka
#   ports:
#     - 9092:9092
#   environment:
#     KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
#     KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
#     KAFKA_NUM_PARTITIONS: 1
#     KAFKA_DEFAULT_REPLICATION_FACTOR: 1
#     KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
#     KAFKA_DELETE_TOPIC_ENABLE: "true"
#   volumes:
#     - kafka:/var/lib/kafka
#   links:
#     - zookeeper
#   depends_on:
#     - zookeeper
nifi:
image: xemuliam/nifi
container_name: nifi
ports:
- 5080:5080
- 5443:8443
- 5081:5081
## for scaling we have to do this
#      - 8080
links:
- zookeeper
- kafka
depends_on:
- zookeeper
- kafka
volumes:
- ./infra/nifi/conf:/opt/nifi/conf
- ./infra/nifi/logs:/opt/nifi/logs
- ./data:/opt/datafiles
- nifi:/opt/nifi/flowfile_repository
- nifi:/opt/nifi/database_repository
- nifi:/opt/nifi/content_repository
- nifi:/opt/nifi/provenance_repository
environment:
ZK_NODES_LIST: zookeeper
IS_CLUSTER_NODE: 1
ELECTION_TIME: 1 min
volumes:
namenode:
datanode:
zookeeper:
kafka:
nifi:

如果您需要更换不同版本的Hive,您需要替换image: bde2020/hive

或者,你应该使用Spark结构化流或Nifi来消费Kafka并写入HDFS而不是使用Hive Kafka存储处理程序。那么你就不需要任何特定版本的Hive了。

你也可以使用HDFS Sink来连接Kafka,它也有Hive集成。

Hive server本身不"存储"。任何东西。

同样,Confluent容器被正确定义为能够从其他容器访问,而wurstmeister容器不是

相关内容

  • 没有找到相关文章

最新更新