背景
随着服务端并发压力日益增大,今天遇到了大批请求超时问题
频繁的切换系统某些指定功能页面,不断F5刷新问题出现,控制台出现了如下错误
Caused by: org.hibernate.exception.GenericJDBCException: Unable to acquire JDBC Connection
at org.hibernate.exception.internal.StandardSQLExceptionConverter.convert(StandardSQLExceptionConverter.java:47) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.engine.jdbc.spi.SqlExceptionHelper.convert(SqlExceptionHelper.java:113) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.engine.jdbc.spi.SqlExceptionHelper.convert(SqlExceptionHelper.java:99) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.acquireConnectionIfNeeded(LogicalConnectionManagedImpl.java:111) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.getPhysicalConnection(LogicalConnectionManagedImpl.java:138) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.getConnectionForTransactionManagement(LogicalConnectionManagedImpl.java:273) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.begin(LogicalConnectionManagedImpl.java:281) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.transaction.backend.jdbc.internal.JdbcResourceLocalTransactionCoordinatorImpl$TransactionDriverControlImpl.begin(JdbcResourceLocalTransactionCoordinatorImpl.java:246) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.engine.transaction.internal.TransactionImpl.begin(TransactionImpl.java:83) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.springframework.orm.jpa.vendor.HibernateJpaDialect.beginTransaction(HibernateJpaDialect.java:184) ~[spring-orm-5.2.9.RELEASE.jar:5.2.9.RELEASE]
at org.springframework.orm.jpa.JpaTransactionManager.doBegin(JpaTransactionManager.java:402) ~[spring-orm-5.2.9.RELEASE.jar:5.2.9.RELEASE]
... 82 more
Caused by: com.alibaba.druid.pool.GetConnectionTimeoutException: wait millis 10013, active 20, maxActive 20, creating 0
at com.alibaba.druid.pool.DruidDataSource.getConnectionInternal(DruidDataSource.java:1738) ~[druid-1.2.1.jar:1.2.1]
at com.alibaba.druid.pool.DruidDataSource.getConnectionDirect(DruidDataSource.java:1408) ~[druid-1.2.1.jar:1.2.1]
at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:1388) ~[druid-1.2.1.jar:1.2.1]
at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:1378) ~[druid-1.2.1.jar:1.2.1]
at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:99) ~[druid-1.2.1.jar:1.2.1]
at io.seata.rm.datasource.DataSourceProxy.getConnection(DataSourceProxy.java:132) ~[seata-all-1.3.0.jar:1.3.0]
at io.seata.rm.datasource.DataSourceProxy.getConnection(DataSourceProxy.java:42) ~[seata-all-1.3.0.jar:1.3.0]
at org.hibernate.engine.jdbc.connections.internal.DatasourceConnectionProviderImpl.getConnection(DatasourceConnectionProviderImpl.java:122) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.internal.NonContextualJdbcConnectionAccess.obtainConnection(NonContextualJdbcConnectionAccess.java:38) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.acquireConnectionIfNeeded(LogicalConnectionManagedImpl.java:108) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.getPhysicalConnection(LogicalConnectionManagedImpl.java:138) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.getConnectionForTransactionManagement(LogicalConnectionManagedImpl.java:273) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.begin(LogicalConnectionManagedImpl.java:281) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.resource.transaction.backend.jdbc.internal.JdbcResourceLocalTransactionCoordinatorImpl$TransactionDriverControlImpl.begin(JdbcResourceLocalTransactionCoordinatorImpl.java:246) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.hibernate.engine.transaction.internal.TransactionImpl.begin(TransactionImpl.java:83) ~[hibernate-core-5.4.21.Final.jar:5.4.21.Final]
at org.springframework.orm.jpa.vendor.HibernateJpaDialect.beginTransaction(HibernateJpaDialect.java:184) ~[spring-orm-5.2.9.RELEASE.jar:5.2.9.RELEASE]
at org.springframework.orm.jpa.JpaTransactionManager.doBegin(JpaTransactionManager.java:402) ~[spring-orm-5.2.9.RELEASE.jar:5.2.9.RELEASE]
... 82 more
2022-07-17 09:12:47.912 [http-nio-8921-exec-11] INFO com.cysoft.service.aivisia.controller.AiVisiaDetectController - ######visia callback data={"image":"http://data-public.vsais.com/4a3af313-20cf-4aa0-a7fd-a6c60fc6a563/20220615012246944386050.jpg","detect_type":1,"erp_sn":"622255","device_sn":"J41B2F57B8F2B268BCF","camera_id":"20cedab6d4bc472cb4bff3b394e994b4","objects_count":2,"camera_sn":"7630bea3-84a1-46cd-8f94-166f29cd7b36","thumb_list":["http://data-public.vsais.com/4a3af313-20cf-4aa0-a7fd-a6c60fc6a563/20220615012247536249021.jpg","http://data-public.vsais.com/4a3af313-20cf-4aa0-a7fd-a6c60fc6a563/20220615012247688449845.jpg"],"image_result":"http://data-public.vsais.com/4a3af313-20cf-4aa0-a7fd-a6c60fc6a563/20220615012247177705520.jpg","datetime":"2022-06-15 09:22:07","video_url":"","project_sn":"8888888","camera_name":"安全帽_VSD-HiK-001","camera_info":{"camera_ip":"172.16.1.147","camera_port":"8000","camera_sn":"E85770663"}}
2022-07-17 09:12:47.917 [http-nio-8921-exec-11] INFO io.seata.tm.api.DefaultGlobalTransaction - Begin new global transaction [172.16.2.112:8091:280267452659990528]
2022-07-17 09:12:50.090 [http-nio-8921-exec-6] WARN org.hibernate.engine.jdbc.spi.SqlExceptionHelper - SQL Error: 0, SQLState: null
2022-07-17 09:12:50.090 [http-nio-8921-exec-6] ERROR org.hibernate.engine.jdbc.spi.SqlExceptionHelper - wait millis 10008, active 20, maxActive 20, creating 0可以看到关键信息 GetConnectionTimeoutException: wait millis 10013, active 20, maxActive 20, creating 0 ;很显然超时原因是由于请求线程处理业务逻辑时获取数据库连接池超时导致,通过 jstack 命令输出线程堆栈信息验证,看到了当前系统的线程大量处于 Wating 状态,并且是由于 DruidDataSource.pollLast 使线程处于等待状态。
"http-nio-8921-exec-53" #501 daemon prio=5 os_prio=0 tid=0x000000004630f000 nid=0x55c4 waiting on condition [0x00000000518fa000]
java.lang.Thread.State: TIMED_WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000005c50cc4e8> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
at com.alibaba.druid.pool.DruidDataSource.pollLast(DruidDataSource.java:2229)
at com.alibaba.druid.pool.DruidDataSource.getConnectionInternal(DruidDataSource.java:1669)我们配置数据库连接配置如下
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url:xxx
username: xxx
password: xxx
maxWait: 10000
initialSize: 8
maxActive: 20
minIdle: 8分析连接配置,初步怀疑是由于部分 慢sql 查询导致连接占用时长,并且慢 sql 占用连接时间太长,导致数据库连接池打满,并且超过了我们的 feign 请求超时时间(默认我们是10秒),导致请求超时导致,所以在系统里添加了 druid 监控配置
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: xxx
username: xxx
password: xxx
maxWait: -1
initialSize: 8
maxActive: 20
minIdle: 8
druid:
#配置druid监控页
aop-patterns: com.cysoft.* #监控springBean
filters: stat,wall #底层开启功能,stat(sql监控),wall(防火墙) 注意是druid下的filters
stat-view-servlet: #配置监控页功能
enabled: true #默认开启,这里显示说明
login-username: admin #登录名
login-password: admin #登录密码
reset-enable: false #禁用重置按钮
web-stat-filter: #监控web
enabled: true
url-pattern: /* #监控所有
exclusions: '*.js,*.gif,*.jpg,*.png,*.css,*.ico,/druid/*' #放行
filter:
stat: #对上面filters里的stat的详细配置
slow-sql-millis: 500 #慢sql时间是毫秒单位的 执行时间1秒以上的为慢SQL
log-slow-sql: true #日志记录
enabled: true
wall:
enabled: true
config:
drop-table-allow: false #禁用删除表的SQL重启项目,再次通过本地复现步骤复现。登录 druid控制台 ,

看到最慢的sql耗时才耗费200多毫秒且最慢的请求耗时不到2秒,而且观察执行中的请求数量一直没有减少,并且数据源的逻辑连接打开次数和连接关闭次数数量不一致 猜测是由于数据库 连接内存泄漏 导致,即有线程不释放连接导致。继续在数据源配置中添加配置,再次通过 本地复现步骤 复现问题。
#是否自动回收超时连接
removeAbandoned: true
#超时时间(以秒数为单位)
removeAbandonedTimeout: 60
#是否在自动回收超时连接的时候打印连接的超时错误
logAbandoned: true等待1分钟后,看到了 abandon connection 错误信息如下
2022-07-17 09:48:41.541 [Druid-ConnectionPool-Destroy-767266208] ERROR com.alibaba.druid.pool.DruidDataSource - abandon connection, owner thread: http-nio-8921-exec-4, connected at : 1655258197759, open stackTrace
at java.lang.Thread.getStackTrace(Thread.java:1559)
at com.alibaba.druid.pool.DruidDataSource.getConnectionDirect(DruidDataSource.java:1477)
at com.alibaba.druid.filter.FilterChainImpl.dataSource_connect(FilterChainImpl.java:5059)
at com.alibaba.druid.filter.FilterAdapter.dataSource_getConnection(FilterAdapter.java:2756)
at com.alibaba.druid.filter.FilterChainImpl.dataSource_connect(FilterChainImpl.java:5055)
at com.alibaba.druid.filter.stat.StatFilter.dataSource_getConnection(StatFilter.java:689)
at com.alibaba.druid.filter.FilterChainImpl.dataSource_connect(FilterChainImpl.java:5055)
at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:1386)
at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:1378)
at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:99)
at io.seata.rm.datasource.DataSourceProxy.getConnection(DataSourceProxy.java:132)
at io.seata.rm.datasource.DataSourceProxy.getConnection(DataSourceProxy.java:42)
at org.hibernate.engine.jdbc.connections.internal.DatasourceConnectionProviderImpl.getConnection(DatasourceConnectionProviderImpl.java:122)
at org.hibernate.internal.NonContextualJdbcConnectionAccess.obtainConnection(NonContextualJdbcConnectionAccess.java:38)
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.acquireConnectionIfNeeded(LogicalConnectionManagedImpl.java:108)
at org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.getPhysicalConnection(LogicalConnectionManagedImpl.java:138)
at org.hibernate.engine.jdbc.internal.StatementPreparerImpl.connection(StatementPreparerImpl.java:50)
at org.hibernate.engine.jdbc.internal.StatementPreparerImpl$5.doPrepare(StatementPreparerImpl.java:149)
at org.hibernate.engine.jdbc.internal.StatementPreparerImpl$StatementPreparationTemplate.prepareStatement(StatementPreparerImpl.java:176)
at org.hibernate.engine.jdbc.internal.StatementPreparerImpl.prepareQueryStatement(StatementPreparerImpl.java:151)
at org.hibernate.loader.Loader.prepareQueryStatement(Loader.java:2141)
at org.hibernate.loader.Loader.executeQueryStatement(Loader.java:2078)
at org.hibernate.loader.Loader.executeQueryStatement(Loader.java:2056)
at org.hibernate.loader.Loader.doQuery(Loader.java:953)
at org.hibernate.loader.Loader.doQueryAndInitializeNonLazyCollections(Loader.java:350)
at org.hibernate.loader.Loader.doList(Loader.java:2887)
at org.hibernate.loader.Loader.doList(Loader.java:2869)
at org.hibernate.loader.Loader.listIgnoreQueryCache(Loader.java:2701)
at org.hibernate.loader.Loader.list(Loader.java:2696)
at org.hibernate.loader.custom.CustomLoader.list(CustomLoader.java:338)
at org.hibernate.internal.SessionImpl.listCustomQuery(SessionImpl.java:2142)
at org.hibernate.internal.AbstractSharedSessionContract.list(AbstractSharedSessionContract.java:1163)
at org.hibernate.query.internal.NativeQueryImpl.doList(NativeQueryImpl.java:173)
at org.hibernate.query.internal.AbstractProducedQuery.list(AbstractProducedQuery.java:1533)
at org.hibernate.query.Query.getResultList(Query.java:165)
at com.cysoft.core.dao.impl.BaseEntityRepositoryImpl.findPageByNativeSql(BaseEntityRepositoryImpl.java:139)
at com.cysoft.service.passage.dao.impl.PassageCarEntityDaoImpl.queryCarPassageRecord(PassageCarEntityDaoImpl.java:56)
at com.cysoft.service.passage.dao.impl.PassageCarEntityDaoImpl$FastClassBySpringCGLIB$36bd0931.invoke()
at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:771)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163) 所有 abandon connection 线程都是执行 BaseEntityRepositoryImpl 类似的方法导致,部分查询方法如下,
@Resource
private EntityManagerFactory entityManagerFactory;
@Override
public List基本上可以确定是由 entityManagerFactory.createEntityManager().createNativeQuery(sql) 方法导致,官网上了解到 EntityManager 的说明,大致意思是, “EntityManager如果不是由容器管理的情况下需要手动关闭” ,正好我们的 EntityManager 由 EntityManager 创建而来,属于编码式创建,而非容器管理,所以是使用 EntityManager 不当导致连接没有正常归还连接池。
后续我们通过jmeter模拟大量调用某些通过entityManager进行查库的请求,该问题必现。
可以通过下面两种方案解决
@Override
public List在 finally 中调用 EntityManager.close() 归还连接池连接
EntityManager 通过 Spring 管理注入,并且相应的类型上要 加@Transactional 注解
@Repository
@Transactional
public class UserRepositoryImpl implements UserRepository {
@PersistenceContext
EntityManager entityManager;注意注入 entityManager 不能使用 @Autowired ,由于 EntityManager 不是线程安全的,当多个请求进来的时候, spring 会创建多个线程,而 @PersistenceContext 就是用来为每个线程创建一个 EntityManager 的,而 @Autowired 就只创建了一个,为所有线程共用,有可能报错,下面是官网原话。
最后吐槽下, JPA 在国内使用比较少,遇到问题时尽量去查阅官方文档,最好能不用就不用吧,坑太多了。
欢迎各位在评论区留言评论探讨,给出你们的意见~
| 留言与评论(共有 0 条评论) “” |