我正在尝试在本地运行具有 8 个任务槽的 flink,用于并行性为 8 的工作。我正在使用FlinkKafkaProducer011作为接收器。作业运行正常大约 20 分钟,检查点在此期间成功。但是,之后作业失败,我收到以下错误:
org.apache.flink.streaming.connectors.kafka.FlinkKafka011Exception: 无法将数据发送到 Kafka:服务器在 已收到答复。
我尝试将 request.timeout.ms 增加到 15 分钟,但仍然没有运气。
试试这个例子,它对我有用,https://github.com/kali786516/FlinkStreamAndSql/blob/master/src/main/scala/com/aws/examples/kafka/consumer/KafkaConsumer.scala
package com.aws.examples.kafka.consumer
import java.util.{Properties, UUID}
import org.apache.flink.api.common.restartstrategy.RestartStrategies
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.table.api.{TableEnvironment, Types}
import org.apache.flink.table.descriptors.{Json, Kafka, Rowtime, Schema}
import org.apache.flink.types.Row
import org.apache.flink.api.java.io.jdbc
import org.apache.flink.api.java.io.jdbc.JDBCInputFormat.JDBCInputFormatBuilder
import org.apache.flink.api.java.io.jdbc.JDBCOutputFormat
import org.apache.flink.table.sinks.CsvTableSink
object KafkaConsumer {
def main(args: Array[String]): Unit = {
var params: ParameterTool = ParameterTool.fromArgs(args);
/*
if (params.getNumberOfParameters < 4) {
System.out.println("nUsage: ReadWriteKafkaTableSQLAPI --read-topic <topic> --write-topic <topic> --bootstrap.servers <kafka brokers> --group.id <groupid>")
return
}
*/
val kparams: Properties = params.getProperties
kparams.setProperty("auto.offset.reset", "earliest")
kparams.setProperty("flink.starting-position", "earliest")
kparams.setProperty("group.id", UUID.randomUUID.toString)
kparams.setProperty("kafka.bootstrap.servers", "localhost:9092")
// setup streaming environment
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.getConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000))
env.enableCheckpointing(300000) // 300 seconds
env.getConfig.setGlobalJobParameters(params)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val tableEnv = TableEnvironment.getTableEnvironment(env)
tableEnv.connect(new Kafka().version("0.11").topic("creditcardTransaction").
property("bootstrap.servers", "localhost:9092"))
.withSchema(new Schema()
.field("cc_num", org.apache.flink.table.api.Types.STRING)
.field("first_column", org.apache.flink.table.api.Types.STRING)
.field("last_column", org.apache.flink.table.api.Types.STRING)
.field("trans_num", org.apache.flink.table.api.Types.STRING)
.field("trans_time", org.apache.flink.table.api.Types.STRING)
.field("category_column", org.apache.flink.table.api.Types.STRING)
.field("merchant_column", org.apache.flink.table.api.Types.STRING)
.field("amt_column", org.apache.flink.table.api.Types.STRING)
.field("merch_lat", org.apache.flink.table.api.Types.STRING)
.field("merch_long", org.apache.flink.table.api.Types.STRING)
.field("ts", org.apache.flink.table.api.Types.SQL_TIMESTAMP)
.rowtime(new Rowtime().timestampsFromSource.watermarksPeriodicBounded(1000)))
.withFormat(new Json().deriveSchema).inAppendMode.registerTableSource("creditcardTransactiontable")
val query = "SELECT distinct cc_num,first_column,last_column,trans_num,trans_time,category_column,merchant_column,amt_column,merch_lat,merch_long from creditcardTransactiontable where cc_num not in ('cc_num')"
val table = tableEnv.sqlQuery(query)
table.printSchema()
//tEnv.toAppendStream(table, classOf[Row]).print
tableEnv.toRetractStream(table, classOf[org.apache.flink.types.Row]).print
/* write to kafka */
val test = tableEnv.connect(new Kafka().version("0.11").topic("creditcardTransactionTargetTopic").
property("bootstrap.servers", "localhost:9092").sinkPartitionerRoundRobin).
withSchema(new Schema()
.field("cc_num", org.apache.flink.table.api.Types.STRING)
.field("first_column", org.apache.flink.table.api.Types.STRING)
.field("last_column", org.apache.flink.table.api.Types.STRING)
.field("trans_num", org.apache.flink.table.api.Types.STRING)
.field("trans_time", org.apache.flink.table.api.Types.STRING)
.field("category_column", org.apache.flink.table.api.Types.STRING)
.field("merchant_column", org.apache.flink.table.api.Types.STRING)
.field("amt_column", org.apache.flink.table.api.Types.STRING)
.field("merch_lat", org.apache.flink.table.api.Types.STRING)
.field("merch_long", org.apache.flink.table.api.Types.STRING))
.withFormat(new Json().deriveSchema).inAppendMode.registerTableSink("sinkTopic")
val sql = "INSERT INTO sinkTopic SELECT distinct cc_num,first_column,last_column,trans_num,trans_time,category_column,merchant_column,amt_column,merch_lat,merch_long from creditcardTransactiontable where cc_num not in ('cc_num')"
//val csvSink: CsvTableSink = new CsvTableSink("/path/to/file","~")
tableEnv.sqlUpdate(sql)
env.execute()
}
}
聚 甲醛:-
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>FlinkStreamAndSql</groupId>
<artifactId>FlinkStreamAndSql</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<!-- see http://davidb.github.com/scala-maven-plugin -->
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.1.3</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.13</version>
<configuration>
<useFile>false</useFile>
<disableXmlReport>true</disableXmlReport>
<!-- If you have classpath issue like NoDefClassError,... -->
<!-- useManifestOnlyJar>false</useManifestOnlyJar -->
<includes>
<include>**/*Test.*</include>
<include>**/*Suite.*</include>
</includes>
</configuration>
</plugin>
<!-- "package" command plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4.1</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.derby</groupId>
<artifactId>derby</artifactId>
<version>10.13.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-jdbc_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.8.0</version>
</dependency>
<!-- -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kinesis_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.12.8</version>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>amazon-kinesis-client</artifactId>
<version>1.8.8</version>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-kinesis</artifactId>
<version>1.11.579</version>
</dependency>
<dependency>
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.1</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.7</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.4.1</version>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>dynamodb-streams-kinesis-adapter</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>dynamodb-streams-kinesis-adapter</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk</artifactId>
<version>1.11.579</version>
</dependency>
</dependencies>
</project>