在Apache IGNITE中获取间歇性长时间运行的查询



在IGNITE中接收到间歇性LRQ,同一个查询一整天都运行得很好。但有时会收到LRQ,这会导致响应时间大幅增加,并影响平台。

我们使用的是Ignite 2.7.5版本,我们使用的分区每个节点有256个分区,并且有两个节点在UDP多播模式下相互通信。

RAM为210GB,启用持久模式,其他配置如下:

<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd">
<!-- Enable annotation-driven caching. -->

<bean name="noOpFailureHandler" class="org.apache.ignite.failure.NoOpFailureHandler"/>
<bean id="ignite.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
<property name="peerClassLoadingEnabled" value="true"/>
<property name="igniteInstanceName" value="PincodeGrid"/>
<property name="clientMode" value="false"/>
<property name="failureDetectionTimeout" value="80000"/>
<property name="clientFailureDetectionTimeout" value="120000"/>
<property name="systemWorkerBlockedTimeout" value="30000" />
<property name="longQueryWarningTimeout" value="3000"/>
<property name="failureHandler" ref="noOpFailureHandler"/>
<property name="metricsLogFrequency" value="#{600 * 10 * 1000}"/>
<property name="rebalanceThreadPoolSize" value="16"/>
<property name="dataStorageConfiguration">
<bean class="org.apache.ignite.configuration.DataStorageConfiguration">
<!-- Redefining the default region's settings -->
<property name="pageSize" value="#{4 * 1024}"/>
<!--<property name="writeThrottlingEnabled" value="true"/>-->
<property name="defaultDataRegionConfiguration">
<bean class="org.apache.ignite.configuration.DataRegionConfiguration">
<property name="persistenceEnabled" value="true"/>
<property name="initialSize" value="#{105L * 1024 * 1024 * 1024}"/>
<property name="name" value="Default_Region"/>
<!--Setting the size of the default region to 4GB. -->
<property name="maxSize" value="#{120L * 1024 * 1024 * 1024}"/>
<property name="checkpointPageBufferSize"
value="#{4096L * 1024 * 1024}"/>
<!--<property name="pageEvictionMode" value="RANDOM_2_LRU"/>-->
</bean>
</property>
<property name="walPath" value="/wal/pincode"/>
<property name="walArchivePath" value="/wal/pincode/archive"/>
<property name="storagePath" value="/ignite/persistence"/>
<property name="checkpointFrequency" value="180000"/>
<property name="checkpointThreads" value="8"/>
<property name="walMode" value="BACKGROUND"/>
<property name="walSegmentSize" value="#{1L * 1024 * 1024 * 1024}"/>
<!--<property name="authenticationEnabled" value="true"/>-->
</bean>
</property>
<property name="discoverySpi">
<bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
<property name="ipFinder">
<bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.multicast.TcpDiscoveryMulticastIpFinder">
<property name="multicastGroup" value="224.0.0.180"/>
<property name="multicastPort" value="47514"/>
</bean>
</property>
</bean>
</property>
<property name="communicationSpi">
<bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
<property name="messageQueueLimit" value="2048"/>
<property name="socketWriteTimeout" value="10000"/>
<property name="connectionsPerNode" value="10"/>
<property name="usePairedConnections" value="true"/>
<property name="socketReceiveBuffer" value="#{64L * 1024}"/>
</bean>
</property>
</bean>

</beans> 

这是一个正在成为LRQ的查询。

SPS__Z1.PRIORITY __C0_0,
LOGISTIC__Z2.CODFLAG __C0_1,
LOGISTIC__Z2.CODLIMIT __C0_2,
LOGISTIC__Z2.PREPAIDLIMIT __C0_3,
LOGISTIC__Z2.SLAVEID __C0_4,
STOCKROOM__Z0.ORDERCUTOFFTIMESDD __C0_5,
STOCKROOM__Z0.ORDERCUTOFFTIMEED __C0_6,
STOCKROOM__Z0.ORDERCUTOFFTIMEHD __C0_7,
LOGISTIC__Z2.LOGISTICSID __C0_8,
LOGISTIC__Z2.LOGISTICPRIORITY __C0_9
FROM "SellerPincodeServiceabilityCache".SELLERPINCODESERVICEABILITY SPS__Z1 
INNER JOIN "LogisticsServiceabilityCache".LOGISTICSSERVICEABILITY LOGISTIC__Z2 
ON TRUE 
INNER JOIN "StockRoomLocationCache".STOCKROOMLOCATIONS STOCKROOM__Z0 
ON TRUE
WHERE (LOGISTIC__Z2.PREPAIDLIMIT >= ?3) AND ((LOGISTIC__Z2.ISLOGISTICACTIVE = 'Y') AND ((LOGISTIC__Z2.ISFRAGILE = ?8) AND ((LOGISTIC__Z2.ISPRECIOUS = ?7) AND ((LOGISTIC__Z2.DELIVERYMODE = ?6) AND ((LOGISTIC__Z2.TRANSPORTMODE = ?5) AND ((LOGISTIC__Z2.DESTNPINCODE = ?1) AND ((STOCKROOM__Z0.ACTIVE = 'Y') AND ((STOCKROOM__Z0.SELLERID = ?2) AND ((STOCKROOM__Z0.SLAVEID = LOGISTIC__Z2.SLAVEID) AND ((SPS__Z1.SLAVEID = LOGISTIC__Z2.SLAVEID) AND ((SPS__Z1.SHIPMENTTYPE = ?4) AND ((SPS__Z1.DELIVERYTYPE = ?6) AND ((SPS__Z1.PINCODE = ?1) AND (SPS__Z1.SELLERID = ?2))))))))))))))
ORDER BY 1, 10', plan=
SELECT
SPS__Z1.PRIORITY AS __C0_0,
LOGISTIC__Z2.CODFLAG AS __C0_1,
LOGISTIC__Z2.CODLIMIT AS __C0_2,
LOGISTIC__Z2.PREPAIDLIMIT AS __C0_3,
LOGISTIC__Z2.SLAVEID AS __C0_4,
STOCKROOM__Z0.ORDERCUTOFFTIMESDD AS __C0_5,
STOCKROOM__Z0.ORDERCUTOFFTIMEED AS __C0_6,
STOCKROOM__Z0.ORDERCUTOFFTIMEHD AS __C0_7,
LOGISTIC__Z2.LOGISTICSID AS __C0_8,
LOGISTIC__Z2.LOGISTICPRIORITY AS __C0_9
FROM "SellerPincodeServiceabilityCache".SELLERPINCODESERVICEABILITY SPS__Z1
/* "SellerPincodeServiceabilityCache".SELLER_PINCODE_SERVICE_INDX: SHIPMENTTYPE = ?4
AND DELIVERYTYPE = ?6
AND PINCODE = ?1
AND SELLERID = ?2
*/
/* WHERE (SPS__Z1.SELLERID = ?2)
AND ((SPS__Z1.PINCODE = ?1)
AND ((SPS__Z1.SHIPMENTTYPE = ?4)
AND (SPS__Z1.DELIVERYTYPE = ?6)))
*/
INNER JOIN "LogisticsServiceabilityCache".LOGISTICSSERVICEABILITY LOGISTIC__Z2
/* "LogisticsServiceabilityCache".LOGISTICS_SERVICEABILITY_INDX: PREPAIDLIMIT >= ?3
AND ISLOGISTICACTIVE = 'Y'
AND ISFRAGILE = ?8
AND ISPRECIOUS = ?7
AND DELIVERYMODE = ?6
AND TRANSPORTMODE = ?5
AND DESTNPINCODE = ?1
AND SLAVEID = SPS__Z1.SLAVEID
*/
ON 1=1
/* WHERE (SPS__Z1.SLAVEID = LOGISTIC__Z2.SLAVEID)
AND ((LOGISTIC__Z2.DESTNPINCODE = ?1)
AND ((LOGISTIC__Z2.TRANSPORTMODE = ?5)
AND ((LOGISTIC__Z2.DELIVERYMODE = ?6)
AND ((LOGISTIC__Z2.ISPRECIOUS = ?7)
AND ((LOGISTIC__Z2.ISFRAGILE = ?8)
AND ((LOGISTIC__Z2.PREPAIDLIMIT >= ?3)
AND (LOGISTIC__Z2.ISLOGISTICACTIVE = 'Y')))))))
*/
INNER JOIN "StockRoomLocationCache".STOCKROOMLOCATIONS STOCKROOM__Z0
/* "StockRoomLocationCache".STOCKROOMLOCATIONS_SLAVEID_IDX: SLAVEID = LOGISTIC__Z2.SLAVEID */
ON 1=1
WHERE (LOGISTIC__Z2.PREPAIDLIMIT >= ?3)
AND ((LOGISTIC__Z2.ISLOGISTICACTIVE = 'Y')
AND ((LOGISTIC__Z2.ISFRAGILE = ?8)
AND ((LOGISTIC__Z2.ISPRECIOUS = ?7)
AND ((LOGISTIC__Z2.DELIVERYMODE = ?6)
AND ((LOGISTIC__Z2.TRANSPORTMODE = ?5)
AND ((LOGISTIC__Z2.DESTNPINCODE = ?1)
AND ((STOCKROOM__Z0.ACTIVE = 'Y')
AND ((STOCKROOM__Z0.SELLERID = ?2)
AND ((STOCKROOM__Z0.SLAVEID = LOGISTIC__Z2.SLAVEID)
AND ((SPS__Z1.SLAVEID = LOGISTIC__Z2.SLAVEID)
AND ((SPS__Z1.SHIPMENTTYPE = ?4)
AND ((SPS__Z1.DELIVERYTYPE = ?6)
AND ((SPS__Z1.PINCODE = ?1)
AND (SPS__Z1.SELLERID = ?2))))))))))))))
ORDER BY 1, 10
, parameters=[533233, 125112, 2480.0, TSHIP, SUR, HD, N, N]] ```

也许这个查询真的很慢?你检查过它通常返回多少行以及需要多长时间吗?也许你有一些异常的sellerId,它会运行更长时间?

最新更新