启动第二个节点后出现Cassandra断言错误



我有一个Cassandra集群,它有两个节点,具有简单的复制策略。

一切都很好,直到其中一个节点崩溃。我通过克隆剩余的节点虚拟机恢复了崩溃的节点(因此我们克隆了一个文件系统(,并更新了侦听和RPC地址。

现在我不断地得到以下奇怪的错误。

当我运行每个节点时,一切都很好。但当我启动第二个节点时,第一个节点会返回错误!

ERROR [Native-Transport-Requests-1] 2020-07-21 08:19:31,042 Message.java:693 - Unexpected exception during request; channel = [id: 0xc1935e7a, L:/192.168.40.15:9042 - R:/192.168.40.15:47980]
java.lang.AssertionError: null
at org.apache.cassandra.locator.TokenMetadata.firstTokenIndex(TokenMetadata.java:1065) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.locator.TokenMetadata.firstToken(TokenMetadata.java:1079) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.locator.AbstractReplicationStrategy.getNaturalEndpoints(AbstractReplicationStrategy.java:107) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.service.StorageService.getLiveNaturalEndpoints(StorageService.java:3866) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.service.StorageService.getLiveNaturalEndpoints(StorageService.java:3852) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.service.StorageProxy.getLiveSortedEndpoints(StorageProxy.java:1914) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.service.StorageProxy$RangeIterator.computeNext(StorageProxy.java:1992) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.service.StorageProxy$RangeIterator.computeNext(StorageProxy.java:1962) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.utils.AbstractIterator.hasNext(AbstractIterator.java:47) ~[apache-cassandra-3.11.4.jar:3.11.4]
at com.google.common.collect.Iterators$PeekingImpl.hasNext(Iterators.java:1149) ~[guava-18.0.jar:na]
at org.apache.cassandra.service.StorageProxy$RangeMerger.computeNext(StorageProxy.java:2014) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.service.StorageProxy$RangeMerger.computeNext(StorageProxy.java:1999) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.utils.AbstractIterator.hasNext(AbstractIterator.java:47) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.service.StorageProxy$RangeCommandIterator.computeNext(StorageProxy.java:2132) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.service.StorageProxy$RangeCommandIterator.computeNext(StorageProxy.java:2092) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.utils.AbstractIterator.hasNext(AbstractIterator.java:47) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.db.transform.BasePartitions.hasNext(BasePartitions.java:92) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.cql3.statements.SelectStatement.process(SelectStatement.java:786) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.cql3.statements.SelectStatement.processResults(SelectStatement.java:438) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.cql3.statements.SelectStatement.execute(SelectStatement.java:416) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.cql3.statements.SelectStatement.execute(SelectStatement.java:289) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.cql3.statements.SelectStatement.execute(SelectStatement.java:117) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.cql3.QueryProcessor.processStatement(QueryProcessor.java:225) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.cql3.QueryProcessor.process(QueryProcessor.java:256) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.cql3.QueryProcessor.process(QueryProcessor.java:241) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.transport.messages.QueryMessage.execute(QueryMessage.java:116) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.transport.Message$Dispatcher.channelRead0(Message.java:566) [apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.transport.Message$Dispatcher.channelRead0(Message.java:410) [apache-cassandra-3.11.4.jar:3.11.4]
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105) [netty-all-4.0.44.Final.jar:4.0.44.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357) [netty-all-4.0.44.Final.jar:4.0.44.Final]
at io.netty.channel.AbstractChannelHandlerContext.access$600(AbstractChannelHandlerContext.java:35) [netty-all-4.0.44.Final.jar:4.0.44.Final]
at io.netty.channel.AbstractChannelHandlerContext$7.run(AbstractChannelHandlerContext.java:348) [netty-all-4.0.44.Final.jar:4.0.44.Final]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [na:1.8.0_252]
at org.apache.cassandra.concurrent.AbstractLocalAwareExecutorService$FutureTask.run(AbstractLocalAwareExecutorService.java:162) [apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.concurrent.SEPWorker.run(SEPWorker.java:114) [apache-cassandra-3.11.4.jar:3.11.4]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_252]

我正在使用以下Cassandra版本:[cklsh 5.0.1 | Cassandra 3.11.4 | CQL规范3.4.4 |本机协议v4]

以下是配置文件:

casandra.yaml

cluster_name: 'babelfish'
num_tokens: 256

hinted_handoff_enabled: true

hinted_handoff_throttle_in_kb: 1024
max_hints_delivery_threads: 2

hints_flush_period_in_ms: 10000
max_hints_file_size_in_mb: 128

batchlog_replay_throttle_in_kb: 1024
authenticator: AllowAllAuthenticator
authorizer: AllowAllAuthorizer
role_manager: CassandraRoleManager
roles_validity_in_ms: 2000

permissions_validity_in_ms: 2000

credentials_validity_in_ms: 2000

partitioner: org.apache.cassandra.dht.Murmur3Partitioner
data_file_directories:
- /var/lib/cassandra/data
commitlog_directory: /var/lib/cassandra/commitlog
cdc_enabled: false

disk_failure_policy: stop
commit_failure_policy: stop
prepared_statements_cache_size_mb:
thrift_prepared_statements_cache_size_mb:
key_cache_size_in_mb:
key_cache_save_period: 14400

row_cache_size_in_mb: 0
row_cache_save_period: 0

counter_cache_size_in_mb:
counter_cache_save_period: 7200

saved_caches_directory: /var/lib/cassandra/saved_caches
commitlog_sync: periodic
commitlog_sync_period_in_ms: 10000
commitlog_segment_size_in_mb: 32

seed_provider:
- class_name: org.apache.cassandra.locator.SimpleSeedProvider
parameters:
- seeds: "192.168.30.15, 192.168.40.15"
concurrent_reads: 32
concurrent_writes: 32
concurrent_counter_writes: 32
concurrent_materialized_view_writes: 32



memtable_allocation_type: heap_buffers


index_summary_capacity_in_mb:
index_summary_resize_interval_in_minutes: 60
trickle_fsync: false
trickle_fsync_interval_in_kb: 10240
storage_port: 7000
ssl_storage_port: 7001
listen_address: 192.168.40.15



start_native_transport: true
native_transport_port: 9042

start_rpc: false
rpc_address: 192.168.40.15

rpc_port: 9160

rpc_keepalive: true
rpc_server_type: sync


thrift_framed_transport_size_in_mb: 15
incremental_backups: false
snapshot_before_compaction: false
auto_snapshot: true
column_index_size_in_kb: 64
column_index_cache_size_in_kb: 2

compaction_throughput_mb_per_sec: 16
sstable_preemptive_open_interval_in_mb: 50

read_request_timeout_in_ms: 5000
range_request_timeout_in_ms: 10000
write_request_timeout_in_ms: 2000
counter_write_request_timeout_in_ms: 5000
cas_contention_timeout_in_ms: 1000
truncate_request_timeout_in_ms: 60000
request_timeout_in_ms: 10000
slow_query_log_timeout_in_ms: 500
cross_node_timeout: false

endpoint_snitch: GossipingPropertyFileSnitch
dynamic_snitch_update_interval_in_ms: 100
dynamic_snitch_reset_interval_in_ms: 600000
dynamic_snitch_badness_threshold: 0.1
request_scheduler: org.apache.cassandra.scheduler.NoScheduler

server_encryption_options:
internode_encryption: none
keystore: conf/.keystore
keystore_password: cassandra
truststore: conf/.truststore
truststore_password: cassandra
client_encryption_options:
enabled: false
optional: false
keystore: conf/.keystore
keystore_password: cassandra
internode_compression: dc
inter_dc_tcp_nodelay: false
tracetype_query_ttl: 86400
tracetype_repair_ttl: 604800

enable_user_defined_functions: false
enable_scripted_user_defined_functions: false
enable_materialized_views: true
windows_timer_interval: 1

transparent_data_encryption_options:
enabled: false
chunk_length_kb: 64
cipher: AES/CBC/PKCS5Padding
key_alias: testing:1
key_provider:
- class_name: org.apache.cassandra.security.JKSKeyProvider
parameters:
- keystore: conf/.keystore
keystore_password: cassandra
store_type: JCEKS
key_password: cassandra

tombstone_warn_threshold: 1000
tombstone_failure_threshold: 100000
batch_size_warn_threshold_in_kb: 5
batch_size_fail_threshold_in_kb: 50
unlogged_batch_across_partitions_warn_threshold: 10
compaction_large_partition_warning_threshold_mb: 100
gc_warn_threshold_in_ms: 1000

back_pressure_enabled: false
back_pressure_strategy:
- class_name: org.apache.cassandra.net.RateBasedBackPressure
parameters:
- high_ratio: 0.90
factor: 5
flow: FAST

cassandra-rackdc.properties

# These properties are used with GossipingPropertyFileSnitch and will
# indicate the rack and dc for this node
dc=DC1
rack=RACK1
# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch
# to append a string to the EC2 region name.
#dc_suffix=
# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does.
# prefer_local=true

cassandra拓扑属性

# Cassandra Node IP=Data Center:Rack
192.168.30.15=DC1:RACK1
192.168.40.15=DC1:RACK1
# default for unknown nodes
default=DC1:r1
# Native IPv6 is supported, however you must escape the colon in the IPv6 Address
# Also be sure to comment out JVM_OPTS="$JVM_OPTS -Djava.net.preferIPv4Stack=true"
# in cassandra-env.sh
# fe80:0:0:0:202:b3ff:fe1e:8329=DC1:RAC3

这个错误的来源是什么?如何修复?

如果您克隆了包含所有数据的虚拟机,那么您就拥有了第一个节点的所有数据,包括节点的ID。要解决这个问题,请关闭第二个节点,从data_file_directories和提交日志中删除所有数据,只保留第一个节点作为种子节点,然后启动第二个,这样它就会正常加入集群,在这个过程完成后,更新种子列表(如果保留种子列表中的第二个节点,它不会加入集群,而是引导一个新集群(。

最新更新