我最近在Ubuntu服务器上部署了Airflow,使用官方的Airflow docker映像和docker compose文件作为基础。
一开始,在我的本地windows10机器和服务器上,一切都运行得很好。但几天后,我所有的dags/任务都在服务器上失败了,日志中出现了以下错误消息:
[2021-05-05 09:24:51,274] {taskinstance.py:1063} INFO - Executing <Task(PythonOperator): extract_events> on 2021-05-05T08:00:00+00:00
[2021-05-05 09:24:51,274] {base_task_runner.py:133} INFO - Running on host: 206851aec3f2
[2021-05-05 09:24:51,274] {base_task_runner.py:134} INFO - Running: ['sudo', '-E', '-H', '-u', 'airflow', 'airflow', 'tasks', 'run', 'events_pipeline', 'extract_events', '2021-05-05T08:00:00+00:00', '--job-id', '3', '--pool', 'default_pool', '--raw', '--subdir', 'DAGS_FOLDER/sro/events_dag.py', '--cfg-path', '/tmp/tmpvzvt2zyj', '--error-file', '/tmp/tmp8dbgrtf6']
[2021-05-05 09:24:51,287] {base_task_runner.py:118} INFO - Job 3: Subtask extract_events
[2021-05-05 09:24:51,287] {base_task_runner.py:118} INFO - Job 3: Subtask extract_events We trust you have received the usual lecture from the local System
[2021-05-05 09:24:51,287] {base_task_runner.py:118} INFO - Job 3: Subtask extract_events Administrator. It usually boils down to these three things:
[2021-05-05 09:24:51,287] {base_task_runner.py:118} INFO - Job 3: Subtask extract_events
[2021-05-05 09:24:51,287] {base_task_runner.py:118} INFO - Job 3: Subtask extract_events #1) Respect the privacy of others.
[2021-05-05 09:24:51,287] {base_task_runner.py:118} INFO - Job 3: Subtask extract_events #2) Think before you type.
[2021-05-05 09:24:51,287] {base_task_runner.py:118} INFO - Job 3: Subtask extract_events #3) With great power comes great responsibility.
[2021-05-05 09:24:51,287] {base_task_runner.py:118} INFO - Job 3: Subtask extract_events
[2021-05-05 09:24:51,288] {base_task_runner.py:118} INFO - Job 3: Subtask extract_events sudo: no tty present and no askpass program specified
[2021-05-05 09:24:51,288] {local_task_job.py:146} INFO - Task exited with return code 1
我不明白的是,为什么sudo被用来在容器中运行任务,而当我在本地运行它进行开发时却不是这样。我在本地和服务器上使用完全相同的dockerfile和docker组合文件。docker文件如下所示:
FROM apache/airflow:2.0.1
USER root
# Download microsoft drivers for odbc connection
ARG ACCEPT_EULA=Y
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list
RUN apt-get update
&& apt-get install -y --no-install-recommends
build-essential unixodbc-dev msodbcsql17
USER airflow
# Download the apache-airflow plugin for odbc connections
RUN pip install --no-cache-dir --user apache-airflow[odbc]
USER root
RUN apt-get autoremove -yqq --purge
&& apt-get clean
&& rm -rf /var/lib/apt/lists/*
USER airflow
docker组成文件:
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL.
#
# WARNING: This configuration is for local development. Do not use it in a production deployment.
#
# This configuration supports basic configuration using environment variables or an .env file
# The following variables are supported:
#
# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow.
# Default: apache/airflow:master-python3.8
# AIRFLOW_UID - User ID in Airflow containers
# Default: 50000
# AIRFLOW_GID - Group ID in Airflow containers
# Default: 50000
# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account.
# Default: airflow
# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account.
# Default: airflow
#
# Feel free to modify this file to suit your needs.
---
version: '3'
x-airflow-common:
&airflow-common
# This is the image created by running the docker file.
image: ${AIRFLOW_IMAGE_NAME:-apache/airflow-odbc:2.0.1}
environment:
&airflow-common-env
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
AIRFLOW__CORE__FERNET_KEY: ''
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
volumes:
- ./config/airflow.cfg:/opt/airflow/airflow.cfg
- ./dags:/opt/airflow/dags
- ./logs:/opt/airflow/logs
- ./plugins:/opt/airflow/plugins
user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}"
depends_on:
postgres:
condition: service_healthy
services:
postgres:
image: postgres:13
environment:
POSTGRES_USER: airflow
POSTGRES_PASSWORD: airflow
POSTGRES_DB: airflow
volumes:
- postgres-db-volume:/var/lib/postgresql/data
healthcheck:
test: ["CMD", "pg_isready", "-U", "airflow"]
interval: 5s
retries: 5
restart: always
airflow-webserver:
<<: *airflow-common
command: webserver
ports:
- 8080:8080
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
interval: 10s
timeout: 10s
retries: 5
restart: always
airflow-scheduler:
<<: *airflow-common
command: scheduler
restart: always
airflow-init:
<<: *airflow-common
command: version
environment:
<<: *airflow-common-env
_AIRFLOW_DB_UPGRADE: 'true'
_AIRFLOW_WWW_USER_CREATE: 'true'
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}
volumes:
postgres-db-volume:
我在这里错过了什么?
我发现问题是由以下条目引起的:default_impersonation=气流在我的气流配置文件中。删除后,问题消失了。