@@ -35,7 +35,7 @@ import org.apache.spark.sql.delta.schema.{SchemaMergingUtils, SchemaUtils}
3535import org .apache .spark .sql .delta .sources ._
3636import org .apache .spark .sql .delta .storage .LogStoreProvider
3737import org .apache .spark .sql .delta .util .FileNames
38- import com .google .common .cache .{CacheBuilder , RemovalNotification }
38+ import com .google .common .cache .{Cache , CacheBuilder , RemovalNotification }
3939import org .apache .hadoop .conf .Configuration
4040import org .apache .hadoop .fs .{FileStatus , FileSystem , Path }
4141
@@ -615,21 +615,42 @@ object DeltaLog extends DeltaLogging {
615615 * We create only a single [[DeltaLog ]] for any given `DeltaLogCacheKey` to avoid wasted work
616616 * in reconstructing the log.
617617 */
618- private val deltaLogCache = {
619- val builder = CacheBuilder .newBuilder()
620- .expireAfterAccess(60 , TimeUnit .MINUTES )
621- .removalListener((removalNotification : RemovalNotification [DeltaLogCacheKey , DeltaLog ]) => {
622- val log = removalNotification.getValue
623- // TODO: We should use ref-counting to uncache snapshots instead of a manual timed op
624- try log.unsafeVolatileSnapshot.uncache() catch {
625- case _ : java.lang.NullPointerException =>
626- // Various layers will throw null pointer if the RDD is already gone.
627- }
628- })
629- sys.props.get(" delta.log.cacheSize" )
630- .flatMap(v => Try (v.toLong).toOption)
631- .foreach(builder.maximumSize)
632- builder.build[DeltaLogCacheKey , DeltaLog ]()
618+ type CacheKey = (Path , Map [String , String ])
619+ private [delta] def getOrCreateCache (conf : SQLConf ):
620+ Cache [CacheKey , DeltaLog ] = synchronized {
621+ deltaLogCache match {
622+ case Some (c) => c
623+ case None =>
624+ val builder = createCacheBuilder(conf)
625+ .removalListener(
626+ (removalNotification : RemovalNotification [DeltaLogCacheKey , DeltaLog ]) => {
627+ val log = removalNotification.getValue
628+ // TODO: We should use ref-counting to uncache snapshots instead of a manual timed op
629+ try log.unsafeVolatileSnapshot.uncache() catch {
630+ case _ : java.lang.NullPointerException =>
631+ // Various layers will throw null pointer if the RDD is already gone.
632+ }
633+ })
634+ deltaLogCache = Some (builder.build[CacheKey , DeltaLog ]())
635+ deltaLogCache.get
636+ }
637+ }
638+
639+ private var deltaLogCache : Option [Cache [CacheKey , DeltaLog ]] = None
640+
641+ /**
642+ * Helper to create delta log caches
643+ */
644+ private def createCacheBuilder (conf : SQLConf ): CacheBuilder [AnyRef , AnyRef ] = {
645+ val cacheRetention = conf.getConf(DeltaSQLConf .DELTA_LOG_CACHE_RETENTION_MINUTES )
646+ val cacheSize = conf
647+ .getConf(DeltaSQLConf .DELTA_LOG_CACHE_SIZE )
648+ .max(sys.props.get(" delta.log.cacheSize" ).map(_.toLong).getOrElse(0L ))
649+
650+ CacheBuilder
651+ .newBuilder()
652+ .expireAfterAccess(cacheRetention, TimeUnit .MINUTES )
653+ .maximumSize(cacheSize)
633654 }
634655
635656
@@ -787,7 +808,8 @@ object DeltaLog extends DeltaLogging {
787808 // - Different `authority` (e.g., different user tokens in the path)
788809 // - Different mount point.
789810 try {
790- deltaLogCache.get(path -> fileSystemOptions, () => {
811+ getOrCreateCache(spark.sessionState.conf)
812+ .get(path -> fileSystemOptions, () => {
791813 createDeltaLog()
792814 }
793815 )
@@ -801,7 +823,7 @@ object DeltaLog extends DeltaLogging {
801823 if (Option (deltaLog.sparkContext.get).map(_.isStopped).getOrElse(true )) {
802824 // Invalid the cached `DeltaLog` and create a new one because the `SparkContext` of the cached
803825 // `DeltaLog` has been stopped.
804- deltaLogCache .invalidate(path -> fileSystemOptions)
826+ getOrCreateCache(spark.sessionState.conf) .invalidate(path -> fileSystemOptions)
805827 getDeltaLogFromCache()
806828 } else {
807829 deltaLog
@@ -819,6 +841,7 @@ object DeltaLog extends DeltaLogging {
819841 // scalastyle:on deltahadoopconfiguration
820842 val path = fs.makeQualified(rawPath)
821843
844+ val deltaLogCache = getOrCreateCache(spark.sessionState.conf)
822845 if (spark.sessionState.conf.getConf(
823846 DeltaSQLConf .LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS )) {
824847 // We rely on the fact that accessing the key set doesn't modify the entry access time. See
@@ -841,12 +864,19 @@ object DeltaLog extends DeltaLogging {
841864 }
842865
843866 def clearCache (): Unit = {
844- deltaLogCache.invalidateAll()
867+ deltaLogCache.foreach(_.invalidateAll())
868+ }
869+
870+ /** Unset the caches. Exposing for testing */
871+ private [delta] def unsetCache (): Unit = {
872+ synchronized {
873+ deltaLogCache = None
874+ }
845875 }
846876
847877 /** Return the number of cached `DeltaLog`s. Exposing for testing */
848878 private [delta] def cacheSize : Long = {
849- deltaLogCache.size()
879+ deltaLogCache.map(_. size()).getOrElse( 0L )
850880 }
851881
852882 /**
0 commit comments