使用 EKS 集群创建 AWS ALB 入口时出现 terraform 拨号 tcp dns 错误



我正在尝试使用 Terraform 创建一个具有 ALB 负载均衡器和 kubernetes 入口的 AWS EKS 集群。

我一直在使用这个 git 存储库和这个博客来指导我。

创建群集后,部署将立即失败并显示以下错误。

Error: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/api/v1/namespaces/kube-system/configmaps": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host
on modules/alb/alb_ingress_controller.tf line 1, in resource "kubernetes_config_map" "aws_auth":
1: resource "kubernetes_config_map" "aws_auth" {

Error: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/apis/rbac.authorization.k8s.io/v1/clusterroles": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host
on modules/alb/alb_ingress_controller.tf line 20, in resource "kubernetes_cluster_role" "alb-ingress":
20: resource "kubernetes_cluster_role" "alb-ingress" {

Error: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/apis/rbac.authorization.k8s.io/v1/clusterrolebindings": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host
on modules/alb/alb_ingress_controller.tf line 41, in resource "kubernetes_cluster_role_binding" "alb-ingress":
41: resource "kubernetes_cluster_role_binding" "alb-ingress" {

Error: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/api/v1/namespaces/kube-system/serviceaccounts": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host
on modules/alb/alb_ingress_controller.tf line 62, in resource "kubernetes_service_account" "alb-ingress":
62: resource "kubernetes_service_account" "alb-ingress" {

Error: Failed to create Ingress 'default/main-ingress' because: Post "https://E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com/apis/extensions/v1beta1/namespaces/default/ingresses": dial tcp: lookup E8475B1B3693C979073BF0D721D876A7.sk1.ap-southeast-1.eks.amazonaws.com on 8.8.8.8:53: no such host
on modules/alb/kubernetes_ingress.tf line 1, in resource "kubernetes_ingress" "main":
1: resource "kubernetes_ingress" "main" {

Error: Post "https://641480DEC80EB445C6CBBEDC9D1F0234.yl4.ap-southeast-1.eks.amazonaws.com/api/v1/namespaces/kube-system/configmaps": dial tcp 10.0.21.192:443: connect: no route to host
on modules/eks/allow_nodes.tf line 22, in resource "kubernetes_config_map" "aws_auth":
22: resource "kubernetes_config_map" "aws_auth" {

这是我的地形代码:

provider "aws" {
region  = var.aws_region
version = "~> 2.65.0"
ignore_tags { 
keys = ["kubernetes.io/role/internal-elb", "app.kubernetes.io/name"]
key_prefixes = ["kubernetes.io/cluster/", "alb.ingress.kubernetes.io/"]
}
}
resource "kubernetes_config_map" "aws_auth" {
metadata {
name = "aws-auth"
namespace = "kube-system"
}
data = {
mapRoles = <<EOF
- rolearn: ${var.iam_role_node}
username: system:node:{{EC2PrivateDNSName}}
groups:
- system:bootstrappers
- system:nodes
EOF
}
depends_on = [
var.eks_cluster_name
]
}
resource "kubernetes_cluster_role" "alb-ingress" {
metadata {
name = "alb-ingress-controller"
labels = {
"app.kubernetes.io/name" = "alb-ingress-controller"
}
}
rule {
api_groups = ["", "extensions"]
resources  = ["configmaps", "endpoints", "events", "ingresses", "ingresses/status", "services"]
verbs      = ["create", "get", "list", "update", "watch", "patch"]
}
rule {
api_groups = ["", "extensions"]
resources  = ["nodes", "pods", "secrets", "services", "namespaces"]
verbs      = ["get", "list", "watch"]
}
}
resource "kubernetes_cluster_role_binding" "alb-ingress" {
metadata {
name = "alb-ingress-controller"
labels = {
"app.kubernetes.io/name" = "alb-ingress-controller"
}
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind      = "ClusterRole"
name      = "alb-ingress-controller"
}
subject {
kind      = "ServiceAccount"
name      = "alb-ingress-controller"
namespace = "kube-system"
}
}
resource "kubernetes_service_account" "alb-ingress" {
metadata {
name = "alb-ingress-controller"
namespace = "kube-system"
labels = {
"app.kubernetes.io/name" = "alb-ingress-controller"
}
}
automount_service_account_token = true
}
resource "kubernetes_deployment" "alb-ingress" {
metadata {
name = "alb-ingress-controller"
labels = {
"app.kubernetes.io/name" = "alb-ingress-controller"
}
namespace = "kube-system"
}
spec {
selector {
match_labels = {
"app.kubernetes.io/name" = "alb-ingress-controller"
}
}
template {
metadata {
labels = {
"app.kubernetes.io/name" = "alb-ingress-controller"
}
}
spec {
volume {
name = kubernetes_service_account.alb-ingress.default_secret_name
secret {
secret_name = kubernetes_service_account.alb-ingress.default_secret_name
}
}
container {
# This is where you change the version when Amazon comes out with a new version of the ingress controller
image = "docker.io/amazon/aws-alb-ingress-controller:v1.1.7"
name  = "alb-ingress-controller"
args = [
"--ingress-class=alb",
"--cluster-name=${var.eks_cluster_name}",
"--aws-vpc-id=${var.vpc_id}",
"--aws-region=${var.aws_region}"]
}
service_account_name = "alb-ingress-controller"
}
}
}
}
########################################################################################
# setup provider for kubernetes
//data "external" "aws_iam_authenticator" {
//  program = ["sh", "-c", "aws-iam-authenticator token -i ${var.cluster_name} | jq -r -c .status"]
//}
data "aws_eks_cluster_auth" "tf_eks_cluster" {
name = aws_eks_cluster.tf_eks_cluster.name
}
provider "kubernetes" {
host                      = aws_eks_cluster.tf_eks_cluster.endpoint
cluster_ca_certificate    = base64decode(aws_eks_cluster.tf_eks_cluster.certificate_authority.0.data)
//token                   = data.external.aws_iam_authenticator.result.token
token                     = data.aws_eks_cluster_auth.tf_eks_cluster.token
load_config_file          = false
version = "~> 1.9"
}
# Allow worker nodes to join cluster via config map
resource "kubernetes_config_map" "aws_auth" {
metadata {
name = "aws-auth"
namespace = "kube-system"
}
data = {
mapRoles = <<EOF
- rolearn: ${aws_iam_role.tf-eks-node.arn}
username: system:node:{{EC2PrivateDNSName}}
groups:
- system:bootstrappers
- system:nodes
EOF
}
depends_on = [aws_eks_cluster.tf_eks_cluster, aws_autoscaling_group.tf_eks_cluster] 
}

resource "kubernetes_ingress" "main" {
metadata {
name = "main-ingress"
annotations = {
"alb.ingress.kubernetes.io/scheme" = "internet-facing"
"kubernetes.io/ingress.class" = "alb"
"alb.ingress.kubernetes.io/subnets" = var.app_subnet_stringlist
"alb.ingress.kubernetes.io/certificate-arn" = "${data.aws_acm_certificate.api.arn}, ${data.aws_acm_certificate.gitea.arn}"
"alb.ingress.kubernetes.io/listen-ports" = <<JSON
[
{"HTTP": 80},
{"HTTPS": 443}
]
JSON
"alb.ingress.kubernetes.io/actions.ssl-redirect" = <<JSON
{
"Type": "redirect",
"RedirectConfig": {
"Protocol": "HTTPS",
"Port": "443",
"StatusCode": "HTTP_301"
}
}
JSON
}
}
spec {
rule {
host = "api.xactpos.com"
http {
path {
backend {
service_name = "ssl-redirect"
service_port = "use-annotation"
}
path = "/*"
}
path {
backend {
service_name = "app-service1"
service_port = 80
}
path = "/service1"
}
path {
backend {
service_name = "app-service2"
service_port = 80
}
path = "/service2"
}
}
}
rule {
host = "gitea.xactpos.com"
http {
path {
backend {
service_name = "ssl-redirect"
service_port = "use-annotation"
}
path = "/*"
}
path {
backend {
service_name = "api-service1"
service_port = 80
}
path = "/service3"
}
path {
backend {
service_name = "api-service2"
service_port = 80
}
path = "/graphq4"
}
}
}
}
}
resource "aws_security_group" "eks-alb" {
name        = "eks-alb-public"
description = "Security group allowing public traffic for the eks load balancer."
vpc_id      = var.vpc_id
egress {
from_port   = 0
to_port     = 0
protocol    = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = map(
"Name", "terraform-eks-alb",
"kubernetes.io/cluster/tf-eks-cluster", "owned"
)
}
resource "aws_security_group_rule" "eks-alb-public-https" {
description       = "Allow eks load balancer to communicate with public traffic securely."
cidr_blocks       = ["0.0.0.0/0"]
from_port         = 443
protocol          = "tcp"
security_group_id = aws_security_group.eks-alb.id
to_port           = 443
type              = "ingress"
}
resource "aws_security_group_rule" "eks-alb-public-http" {
description       = "Allow eks load balancer to communicate with public traffic."
cidr_blocks       = ["0.0.0.0/0"]
from_port         = 80
protocol          = "tcp"
security_group_id = aws_security_group.eks-alb.id
to_port           = 80
type              = "ingress"
}
resource "aws_eks_cluster" "tf_eks_cluster" {
name            = var.cluster_name
role_arn        = aws_iam_role.tf-eks-cluster.arn
vpc_config {
security_group_ids      = [aws_security_group.tf-eks-cluster.id]
subnet_ids              = var.app_subnet_ids
endpoint_private_access = true
endpoint_public_access  = false
}
depends_on = [
aws_iam_role_policy_attachment.tf-eks-cluster-AmazonEKSClusterPolicy,
aws_iam_role_policy_attachment.tf-eks-cluster-AmazonEKSServicePolicy,
]
}
# Setup for IAM role needed to setup an EKS cluster
resource "aws_iam_role" "tf-eks-cluster" {
name = "tf-eks-cluster"
assume_role_policy = <<POLICY
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "eks.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
POLICY
}
resource "aws_iam_role_policy_attachment" "tf-eks-cluster-AmazonEKSClusterPolicy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
role       = aws_iam_role.tf-eks-cluster.name
}
resource "aws_iam_role_policy_attachment" "tf-eks-cluster-AmazonEKSServicePolicy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSServicePolicy"
role       = aws_iam_role.tf-eks-cluster.name
}
########################################################################################
# Setup IAM role & instance profile for worker nodes
resource "aws_iam_role" "tf-eks-node" {
name = "tf-eks-node"
assume_role_policy = <<POLICY
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
POLICY
}
resource "aws_iam_instance_profile" "tf-eks-node" {
name = "tf-eks-node"
role = aws_iam_role.tf-eks-node.name
}
resource "aws_iam_role_policy_attachment" "tf-eks-node-AmazonEKSWorkerNodePolicy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
role       = aws_iam_role.tf-eks-node.name
}
resource "aws_iam_role_policy_attachment" "tf-eks-node-AmazonEKS_CNI_Policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
role       = aws_iam_role.tf-eks-node.name
}
resource "aws_iam_role_policy_attachment" "tf-eks-node-AmazonEC2ContainerRegistryReadOnly" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
role       = aws_iam_role.tf-eks-node.name
}
resource "aws_iam_role_policy_attachment" "tf-eks-node-AmazonEC2FullAccess" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2FullAccess"
role       = aws_iam_role.tf-eks-node.name
}
resource "aws_iam_role_policy_attachment" "tf-eks-node-alb-ingress_policy" {
policy_arn = aws_iam_policy.alb-ingress.arn
role       = aws_iam_role.tf-eks-node.name
}
resource "aws_iam_policy" "alb-ingress" {    
name   = "alb-ingress-policy"
policy = file("${path.module}/alb_ingress_policy.json")
}
# generate KUBECONFIG as output to save in ~/.kube/config locally
# save the 'terraform output eks_kubeconfig > config', run 'mv config ~/.kube/config' to use it for kubectl
locals {
kubeconfig = <<KUBECONFIG

apiVersion: v1
clusters:
- cluster:
server: ${aws_eks_cluster.tf_eks_cluster.endpoint}
certificate-authority-data: ${aws_eks_cluster.tf_eks_cluster.certificate_authority.0.data}
name: kubernetes
contexts:
- context:
cluster: kubernetes
user: aws
name: aws
current-context: aws
kind: Config
preferences: {}
users:
- name: aws
user:
exec:
apiVersion: client.authentication.k8s.io/v1alpha1
command: aws-iam-authenticator
args:
- "token"
- "-i"
- "${var.cluster_name}"
KUBECONFIG
}
########################################################################################
# Setup AutoScaling Group for worker nodes
# Setup data source to get amazon-provided AMI for EKS nodes
data "aws_ami" "eks-worker" {
filter {
name   = "name"
values = ["amazon-eks-node-v*"]
}
most_recent = true
owners      = ["602401143452"] # Amazon EKS AMI Account ID
}
# Is provided in demo code, no idea what it's used for though! TODO: DELETE
# data "aws_region" "current" {}
# EKS currently documents this required userdata for EKS worker nodes to
# properly configure Kubernetes applications on the EC2 instance.
# We utilize a Terraform local here to simplify Base64 encode this
# information and write it into the AutoScaling Launch Configuration.
# More information: https://docs.aws.amazon.com/eks/latest/userguide/launch-workers.html
locals {
tf-eks-node-userdata = <<USERDATA
#!/bin/bash
set -o xtrace
/etc/eks/bootstrap.sh --apiserver-endpoint '${aws_eks_cluster.tf_eks_cluster.endpoint}' --b64-cluster-ca '${aws_eks_cluster.tf_eks_cluster.certificate_authority.0.data}' '${var.cluster_name}'
USERDATA
}

resource "aws_launch_configuration" "tf_eks_cluster" {
associate_public_ip_address = true
iam_instance_profile        = aws_iam_instance_profile.tf-eks-node.name
image_id                    = data.aws_ami.eks-worker.id
instance_type               = var.instance_type
name_prefix                 = "tf-eks-spot"
security_groups             = [aws_security_group.tf-eks-node.id]
user_data_base64            = base64encode(local.tf-eks-node-userdata)
lifecycle {
create_before_destroy = true
}
}
resource "aws_lb_target_group" "tf_eks_cluster" {
name = "tf-eks-cluster"
port = 31742
protocol = "HTTP"
vpc_id = var.vpc_id
target_type = "instance"
}
resource "aws_autoscaling_group" "tf_eks_cluster" {
desired_capacity     = "2"
launch_configuration = aws_launch_configuration.tf_eks_cluster.id
max_size             = "3"
min_size             = 1
name                 = "tf-eks-cluster"
vpc_zone_identifier  = var.app_subnet_ids
target_group_arns    = [aws_lb_target_group.tf_eks_cluster.arn]
tag {
key                 = "Name"
value               = "tf-eks-cluster"
propagate_at_launch = true
}
tag {
key                 = "kubernetes.io/cluster/${var.cluster_name}"
value               = "owned"
propagate_at_launch = true
}
}
resource "aws_security_group" "tf-eks-cluster" {
name        = "terraform-eks-cluster"
description = "Cluster communication with worker nodes"
vpc_id      = var.vpc_id
egress {
from_port   = 0
to_port     = 0
protocol    = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
Name = "terraform-eks"
}
}
resource "aws_security_group" "tf-eks-node" {
name        = "terraform-eks-node"
description = "Security group for all nodes in the cluster"
vpc_id      = var.vpc_id
egress {
from_port   = 0
to_port     = 0
protocol    = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
Name = "terraform-eks"
}
}
# Allow inbound traffic from your local workstation external IP
# to the Kubernetes. You will need to replace A.B.C.D below with
# your real IP. Services like icanhazip.com can help you find this.
resource "aws_security_group_rule" "tf-eks-cluster-ingress-workstation-https" {
cidr_blocks       = [var.accessing_computer_ip]
description       = "Allow workstation to communicate with the cluster API Server"
from_port         = 443
protocol          = "tcp"
security_group_id = aws_security_group.tf-eks-cluster.id
to_port           = 443
type              = "ingress"
}
########################################################################################
# Setup worker node security group
resource "aws_security_group_rule" "tf-eks-node-ingress-self" {
description              = "Allow node to communicate with each other"
from_port                = 0
protocol                 = "-1"
security_group_id        = aws_security_group.tf-eks-node.id
source_security_group_id = aws_security_group.tf-eks-node.id
to_port                  = 65535
type                     = "ingress"
}
resource "aws_security_group_rule" "tf-eks-node-ingress-cluster" {
description              = "Allow worker Kubelets and pods to receive communication from the cluster control plane"
from_port                = 1025
protocol                 = "tcp"
security_group_id        = aws_security_group.tf-eks-node.id
source_security_group_id = aws_security_group.tf-eks-cluster.id
to_port                  = 65535
type                     = "ingress"
}
# allow worker nodes to access EKS master
resource "aws_security_group_rule" "tf-eks-cluster-ingress-node-https" {
description              = "Allow pods to communicate with the cluster API Server"
from_port                = 443
protocol                 = "tcp"
security_group_id        = aws_security_group.tf-eks-node.id
source_security_group_id = aws_security_group.tf-eks-cluster.id
to_port                  = 443
type                     = "ingress"
}
resource "aws_security_group_rule" "tf-eks-node-ingress-master" {
description              = "Allow cluster control to receive communication from the worker Kubelets"
from_port                = 443
protocol                 = "tcp"
security_group_id        = aws_security_group.tf-eks-cluster.id
source_security_group_id = aws_security_group.tf-eks-node.id
to_port                  = 443
type                     = "ingress"
}
resource "aws_internet_gateway" "eks" {
vpc_id = aws_vpc.eks.id
tags = {
Name = "internet_gateway"
}
}

resource "aws_eip" "nat_gateway" {
count = var.subnet_count
vpc   = true
}
resource "aws_nat_gateway" "eks" {
count = var.subnet_count
allocation_id = aws_eip.nat_gateway.*.id[count.index]
subnet_id = aws_subnet.gateway.*.id[count.index]
tags = {
Name = "nat_gateway"
}
depends_on = [aws_internet_gateway.eks]
}
resource "aws_route_table" "application" {
count = var.subnet_count
vpc_id = aws_vpc.eks.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.eks.*.id[count.index]
}
tags = {
Name = "eks_application"
}
}
resource "aws_route_table" "vpn" {
vpc_id = aws_vpc.eks.id
tags = {
Name = "eks_vpn"
}
}
resource "aws_route_table" "gateway" {
vpc_id = aws_vpc.eks.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.eks.id
}
tags = {
Name = "eks_gateway"
}
}
resource "aws_route_table_association" "application" {
count = var.subnet_count
subnet_id      = aws_subnet.application.*.id[count.index]
route_table_id = aws_route_table.application.*.id[count.index]
}
resource "aws_route_table_association" "vpn" {
count = var.subnet_count
subnet_id      = aws_subnet.vpn.*.id[count.index]
route_table_id = aws_route_table.vpn.id
}
resource "aws_route_table_association" "gateway" {
count = var.subnet_count
subnet_id      = aws_subnet.gateway.*.id[count.index]
route_table_id = aws_route_table.gateway.id
}
data "aws_availability_zones" "available" {}
resource "aws_subnet" "gateway" {
count = var.subnet_count
availability_zone = data.aws_availability_zones.available.names[count.index]
cidr_block        = "10.0.1${count.index}.0/24"
vpc_id            = aws_vpc.eks.id
map_public_ip_on_launch = true
tags = {
Name = "eks_gateway"
}  
}
resource "aws_subnet" "application" {
count = var.subnet_count
availability_zone = data.aws_availability_zones.available.names[count.index]
cidr_block        = "10.0.2${count.index}.0/24"
vpc_id            = aws_vpc.eks.id
map_public_ip_on_launch = true
tags = map(
"Name", "eks_application",
"kubernetes.io/cluster/${var.cluster_name}", "shared"
)
}
resource "aws_subnet" "vpn" {
count = var.subnet_count
availability_zone = data.aws_availability_zones.available.names[count.index]
cidr_block        = "10.0.3${count.index}.0/24"
vpc_id            = aws_vpc.eks.id
tags = {
Name = "eks_vpn"
}  
}
resource "aws_vpc" "eks" {
cidr_block = "10.0.0.0/16"
enable_dns_hostnames = true
enable_dns_support = true
tags = map(
"Name", "eks-vpc",
"kubernetes.io/cluster/${var.cluster_name}", "shared"
)
}

我曾尝试在单个大型Terraform清单中创建Kubernetes部署。我需要将 Kubernetes 部署分离到一个单独的 Terraform 清单中,我在更新~/.kube/config文件后应用了该清单。

DNS 错误是由于此文件不是新群集的最新版本。

此外,我需要确保在 eks 集群资源中设置endpoint_private_access = true

最新更新