Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit c67c9fc

Browse files
authored
Take SwiftProtobuf dependency out from ModelSupport (#694)
1 parent 7e93db5 commit c67c9fc

File tree

7 files changed

+19
-15
lines changed

7 files changed

+19
-15
lines changed

Models/Text/BERT/BERT.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,22 @@ import Checkpoints
1616
import Datasets
1717
import Foundation
1818
import ModelSupport
19+
import SwiftProtobuf
1920
import TensorFlow
2021

22+
extension Vocabulary {
23+
public init(fromSentencePieceModel fileURL: URL) throws {
24+
self.init(
25+
tokensToIds: [String: Int](
26+
(try Sentencepiece_ModelProto(serializedData: Data(contentsOf: fileURL)))
27+
.pieces
28+
.map { $0.piece.replacingOccurrences(of: "", with: "##") }
29+
.map { $0 == "<unk>" ? "[UNK]" : $0 }
30+
.enumerated().map { ($0.element, $0.offset) },
31+
uniquingKeysWith: { (v1, v2) in max(v1, v2) }))
32+
}
33+
}
34+
2135
/// Represents a type that can contribute to the regularization term when training models.
2236
public protocol Regularizable: Differentiable {
2337
/// The contribution of this term to the regularization term. This should be set to

Models/Text/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ add_library(TextModels
1010
GPT2/TransformerLM.swift
1111
GPT2/Operators.swift
1212
GPT2/PythonCheckpointReader.swift
13+
SentencePiece/SentencePieceModel.pb.swift
1314
WordSeg/Lattice.swift
1415
WordSeg/Model.swift
1516
WordSeg/SemiRing.swift)
@@ -18,7 +19,8 @@ set_target_properties(TextModels PROPERTIES
1819
target_compile_options(TextModels PRIVATE
1920
$<$<BOOL:${BUILD_TESTING}>:-enable-testing>)
2021
target_link_libraries(TextModels PUBLIC
21-
Datasets)
22+
Datasets
23+
SwiftProtobuf)
2224

2325
install(TARGETS TextModels
2426
ARCHIVE DESTINATION lib/swift/$<LOWER_CASE:${CMAKE_SYSTEM_NAME}>
File renamed without changes.
File renamed without changes.

Package.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ let package = Package(
3333
.target(name: "Datasets", dependencies: ["ModelSupport"], path: "Datasets"),
3434
.target(name: "STBImage", path: "Support/STBImage"),
3535
.target(
36-
name: "ModelSupport", dependencies: ["SwiftProtobuf", "STBImage"], path: "Support",
36+
name: "ModelSupport", dependencies: ["STBImage"], path: "Support",
3737
exclude: ["STBImage"]),
3838
.target(name: "ImageClassificationModels", path: "Models/ImageClassification"),
3939
.target(name: "VideoClassificationModels", path: "Models/Spatiotemporal"),
40-
.target(name: "TextModels", dependencies: ["Checkpoints", "Datasets"], path: "Models/Text"),
40+
.target(name: "TextModels", dependencies: ["Checkpoints", "Datasets", "SwiftProtobuf"], path: "Models/Text"),
4141
.target(name: "RecommendationModels", path: "Models/Recommendation"),
4242
.target(name: "TrainingLoop", dependencies: ["ModelSupport"], path: "TrainingLoop"),
4343
.target(

Support/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ add_library(ModelSupport
1313
AnyLayer.swift
1414
AnyLayerTangentVector.swift
1515
Text/BytePairEncoder.swift
16-
Text/SentencePiece/SentencePieceModel.pb.swift
1716
Text/TextBatch.swift
1817
Text/Tokenization.swift
1918
Text/WordSeg/Alphabet.swift

Support/Text/Tokenization.swift

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,6 @@ extension Vocabulary {
8888
}
8989

9090
extension Vocabulary {
91-
public init(fromSentencePieceModel fileURL: URL) throws {
92-
self.init(
93-
tokensToIds: [String: Int](
94-
(try Sentencepiece_ModelProto(serializedData: Data(contentsOf: fileURL)))
95-
.pieces
96-
.map { $0.piece.replacingOccurrences(of: "", with: "##") }
97-
.map { $0 == "<unk>" ? "[UNK]" : $0 }
98-
.enumerated().map { ($0.element, $0.offset) },
99-
uniquingKeysWith: { (v1, v2) in max(v1, v2) }))
100-
}
101-
10291
public init(fromJSONFile fileURL: URL) throws {
10392
let json = try String(contentsOfFile: fileURL.path)
10493
let tokensToIds = try JSONDecoder().decode(

0 commit comments

Comments
 (0)