Skip to content

Commit 2cd0744

Browse files
committed
Refine prompt, allow multiple assessments and add table to edit doc page
1 parent ea823c2 commit 2cd0744

File tree

11 files changed

+196
-38
lines changed

11 files changed

+196
-38
lines changed

app/assets/stylesheets/application.scss

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
@import "components/client-filters";
3434
@import "components/doc-help";
3535
@import "components/hub-form";
36+
@import "components/hub-doc-assessment";
3637
@import "components/illustrations";
3738
@import "components/progress-indicator";
3839
@import "components/marketing-comparison";
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#doc-assessments-table {
2+
margin: 10rem 5rem 5rem 5rem;
3+
4+
5+
table {
6+
border-collapse: collapse;
7+
font-size: 12px;
8+
}
9+
10+
tr.error {
11+
background-color: #FEF2E7;
12+
}
13+
14+
tr.fail {
15+
background-color: #FEE7E7;
16+
}
17+
18+
tr.pass {
19+
background-color: #E7FEEC;
20+
}
21+
22+
td, th {
23+
border: 1px solid black;
24+
padding: 10px 20px;
25+
}
26+
27+
th {
28+
font-weight: bold;
29+
text-transform: capitalize;
30+
}
31+
}

app/controllers/hub/documents_controller.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,14 @@ def destroy
6666
redirect_back(fallback_location: hub_client_documents_path)
6767
end
6868

69+
def rerun_screener
70+
return head :forbidden if acts_like_production? || !current_user.admin?
71+
72+
DocScreenerJob.perform_now(@document.id)
73+
74+
redirect_back(fallback_location: edit_hub_client_document_path(client_id: @document.client.id, id: @document), notice: "Re-ran document screening.")
75+
end
76+
6977
private
7078

7179
def load_document_type_options

app/jobs/doc_screener_job.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
class DocScreenerJob < ApplicationJob
22
def perform(document_id)
3+
return if Flipper.enabled?(:disable_ai_doc_screener)
4+
35
document = Document.find(document_id)
46
return unless document.upload.attached?
57

@@ -9,7 +11,13 @@ def perform(document_id)
911
input_blob_id: document.upload.blob_id
1012
)
1113

12-
return if assessment.status == "complete"
14+
if assessment.status == "complete"
15+
assessment = DocAssessment.create!(
16+
document_id: document.id,
17+
prompt_version: BedrockDocScreener::PROMPT_VERSION,
18+
input_blob_id: document.upload.blob_id
19+
)
20+
end
1321

1422
assessment.update!(
1523
status: "processing",

app/models/doc_assessment.rb

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
#
1717
# Indexes
1818
#
19-
# index_doc_assessments_on_doc_and_version_and_blob (document_id,prompt_version,input_blob_id) UNIQUE
20-
# index_doc_assessments_on_document_id (document_id)
19+
# index_doc_assessments_on_document_id (document_id)
2120
#
2221
# Foreign Keys
2322
#

app/models/document.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ class Document < ApplicationRecord
4444
belongs_to :contact_record, polymorphic: true, optional: true
4545
belongs_to :tax_return, optional: true
4646
belongs_to :uploaded_by, polymorphic: true, optional: true
47+
has_many :assessments, class_name: "DocAssessment", dependent: :destroy
48+
has_one :latest_assessment, -> { order(created_at: :desc) }, class_name: "DocAssessment"
4749

4850
validates_presence_of :client
4951
validates_presence_of :upload
@@ -88,6 +90,7 @@ class Document < ApplicationRecord
8890
end
8991
after_save_commit { SearchIndexer.refresh_filterable_properties([client_id]) }
9092
after_destroy_commit { SearchIndexer.refresh_filterable_properties([client_id]) }
93+
after_update_commit :rerun_screener_if_document_type_changed, if: :saved_change_to_document_type?
9194

9295
# has_one_attached needs to be called after defining any callbacks that access attachments, like
9396
# the HEIC conversion; see https://github.com/rails/rails/issues/37304
@@ -150,6 +153,12 @@ def uploaded_by_name_label
150153

151154
private
152155

156+
def rerun_screener_if_document_type_changed
157+
return unless upload.attached?
158+
159+
DocScreenerJob.perform_later(id)
160+
end
161+
153162
def tax_return_belongs_to_client
154163
errors.add(:tax_return_id, I18n.t("forms.errors.tax_return_belongs_to_client")) unless tax_return.blank? || tax_return.client == client
155164
end
Lines changed: 98 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,80 @@
11
require 'aws-sdk-bedrockruntime'
22
require 'json'
33
require 'base64'
4+
require "mini_magick"
5+
require "tempfile"
46

57
module BedrockDocScreener
68
MODEL_ID = 'us.anthropic.claude-haiku-4-5-20251001-v1:0'.freeze
79
REGION = 'us-east-1'.freeze
810
SUPPORTED_MEDIA_TYPES = %w[
911
image/png
1012
image/jpeg
13+
application/pdf
1114
].freeze
12-
# since the payload is `type: "image"` it won't work for application/pdf
13-
# can convert the pdf to image or use another bedrock flow for docs
1415

15-
# Update the prompt version if you are updating the prompt
1616
PROMPT_VERSION = "v1".freeze
17+
1718
def self.prompt_for(document_type:)
1819
<<~PROMPT
19-
You are validating an uploaded client document.
20-
21-
Document type: #{document_type}
20+
Clients are uploading documents and you need to verify the validity of the document using these rules:
21+
1) If the photo is a poor quality image (poorly lit, blurry, cropped & missing information, pixelated screen etc.)
22+
so much so that it renders the document illegible,
23+
then set reason="unreadable" and set verdict="fail".
24+
2) If the document does not fit any of the doc types in the available-doc-types list,
25+
then set reason="no_doc_type_match" and set verdict="fail".
26+
3) If it does not appear to match the stated doc type (in this case #{document_type})
27+
but does match another type in the available-doc-types list,
28+
then set reason="wrong_document_type", verdict="fail"
29+
and include the doc-types that might be match in the explanation field by their label name.
30+
4) If the document is expired, then set reason="expired" and verdict="fail".
31+
5) If the document is fake (for example if it is labeled as a 'sample'),
32+
then set reason="fake" and verdict="fail".
33+
6) If there is another reason that the document is not valid,
34+
then set reason="other" and verdict="fail".
35+
7) If the document seems to be a valid document, readable and the selected document type matches,
36+
then set reason="" and verdict="pass".
37+
8) "confidence" must be between 0.0 and 1.0.
38+
9) Do not include any keys other than "verdict", "reason", "explanation" and "confidence"
39+
10) verdict should only be "pass" or "fail"
40+
41+
available-doc-types: #{available_doc_types}
42+
43+
Selected document type: #{document_type}
2244
2345
Return ONLY valid JSON with this exact schema:
24-
2546
{
26-
"verdict": "pass" | "fail" | "needs_review",
27-
"reasons": [string, brief explanation],
47+
"verdict": "pass" | "fail",
48+
"reason": "unreadable" | "no_doc_type_match" | "wrong_document_type" | expired" | "fake" | "other",
49+
"explanation": [Brief 1-2 sentence explanation of reason. Explain why valid/invalid.],
2850
"confidence": number between 0.0-1.0,
2951
}
30-
31-
Rules:
32-
- If the document is unreadable, set verdict="needs_review" and include reason "unreadable".
33-
- If it does not appear to match the stated document type, verdict="fail" and include reason "wrong_document_type".
34-
- If it appears valid and readable, verdict="pass".
35-
- "confidence" must be between 0.0 and 1.0.
36-
- Do not include any keys other than verdict, reasons and confidence
3752
PROMPT
3853
end
3954

55+
def self.available_doc_types
56+
# matches @doc_type_options in document controller
57+
available_doc_types = [DocumentTypes::Identity, DocumentTypes::SsnItin] + (DocumentTypes::ALL_TYPES - DocumentTypes::IDENTITY_TYPES - DocumentTypes::SECONDARY_IDENTITY_TYPES)
58+
available_doc_types.map { |d| {key: d.key, label: d.label} }
59+
end
60+
4061
def self.screen_document!(document:)
4162
raise "Document has no upload attached" unless document.upload.attached?
4263

4364
media_type = document.upload.content_type
4465
raise "Unsupported media type: #{media_type}" unless SUPPORTED_MEDIA_TYPES.include?(media_type)
4566

46-
base64_data = Base64.strict_encode64(document.upload.download)
67+
input = if media_type == "application/pdf"
68+
pdf_to_png_base64(document.upload)
69+
else
70+
[{
71+
media_type: media_type,
72+
base64_data: Base64.strict_encode64(document.upload.download)
73+
}]
74+
end
4775

4876
body_hash = construct_bedrock_payload(
49-
base64_data: base64_data,
50-
media_type: media_type,
77+
images: input,
5178
user_prompt: prompt_for(document_type: document.document_type)
5279
)
5380

@@ -61,15 +88,17 @@ def self.screen_document!(document:)
6188
[result_json, raw_response_json]
6289
end
6390

64-
def self.construct_bedrock_payload(base64_data:, media_type:, user_prompt:)
91+
def self.construct_bedrock_payload(images:, user_prompt:)
6592
{
66-
anthropic_version: 'bedrock-2023-05-31',
93+
anthropic_version: "bedrock-2023-05-31",
6794
max_tokens: 250,
6895
messages: [{
69-
role: 'user',
96+
role: "user",
7097
content: [
71-
{ type: 'image', source: { type: 'base64', media_type: media_type, data: base64_data } },
72-
{ type: 'text', text: user_prompt }
98+
*images.map do |img|
99+
{ type: "image", source: { type: "base64", media_type: img[:media_type], data: img[:base64_data] } }
100+
end,
101+
{ type: "text", text: user_prompt }
73102
]
74103
}]
75104
}
@@ -89,17 +118,58 @@ def self.extract_text_from_response(response)
89118
Array(response['content'])
90119
.select { |content| content['type'] == 'text' }
91120
.map { |content| content['text'] }
92-
.join('\n')
121+
.join("\n")
93122
.strip
94123
end
95124

96125
def self.parse_strict_json!(text)
97-
cleaned = text.to_s.strip
126+
s = text.to_s
98127

99-
cleaned = cleaned.sub(/\A```(?:json)?\s*/i, "").sub(/\s*```\z/, "").strip
128+
blocks = s.scan(/```(?:json)?\s*(\{.*?\})\s*```/m)
129+
if blocks.any?
130+
json_str = blocks.last.first
131+
return JSON.parse(json_str)
132+
end
100133

134+
cleaned = s.strip
135+
cleaned = cleaned.sub(/\A```(?:json)?\s*/i, "").sub(/\s*```\z/, "").strip
101136
JSON.parse(cleaned)
102137
rescue JSON::ParserError => e
103-
raise "Model did not return valid JSON. Error=#{e.message}. Output=#{text.inspect}"
138+
raise "Bedrock did not return valid JSON. \n Error: #{e.message} \n Output: #{text.inspect}"
139+
end
140+
141+
142+
def self.pdf_to_png_base64(upload)
143+
images = []
144+
145+
Tempfile.create(["upload", ".pdf"]) do |pdf|
146+
pdf.binmode
147+
pdf.write(upload.download)
148+
pdf.flush
149+
150+
MiniMagick::Image.open(pdf.path).pages.each_with_index do |page, index|
151+
Tempfile.create(["pdf_page_#{index}", ".png"]) do |png|
152+
MiniMagick::Tool::Convert.new do |convert|
153+
convert.density(200)
154+
convert.quality(90)
155+
convert << "#{pdf.path}[#{index}]"
156+
convert << png.path
157+
end
158+
159+
data = File.binread(png.path)
160+
161+
images << {
162+
media_type: "image/png",
163+
base64_data: Base64.strict_encode64(data)
164+
}
165+
end
166+
end
167+
end
168+
169+
raise "pdf produced no pages" if images.empty?
170+
images
171+
rescue MiniMagick::Error, MiniMagick::Invalid => e
172+
raise "failed to convert pdf pages to images (perhaps minimagick or ghostscript issue). #{e.class}: #{e.message}"
104173
end
174+
105175
end

app/views/hub/documents/edit.html.erb

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
<%= render 'form', url: [:hub, @client, @document], method: "patch", file_upload_enabled: false %>
1010
</div>
1111
<div id="image-and-button-wrapper">
12-
<div class="item" id="image-container" >
12+
<div class="item" id="image-container">
1313
<% if @document.is_pdf? %>
14-
<embed src="<%= transient_storage_url(@document.upload.blob) %>" width="800px" height="100%" type="application/pdf" />
14+
<embed src="<%= transient_storage_url(@document.upload.blob) %>" width="800px" height="100%" type="application/pdf"/>
1515
<% else %>
1616
<%= image_tag transient_storage_url(@document.upload.blob), id: 'image', class: 'rotatable-image', data: { rotation: 0 } %>
1717
<% end %>
@@ -23,4 +23,39 @@
2323
</div>
2424

2525
</div>
26-
<% end %>
26+
27+
<% if !acts_like_production? && current_user.admin? %>
28+
<div id="doc-assessments-table">
29+
<%= button_to "Re-run screener",
30+
rerun_screener_hub_client_document_path(client_id: @document.client.id, id: @document.id),
31+
method: :post %>
32+
<br>
33+
<% if @document.assessments.present? %>
34+
<table class="index-table">
35+
<tr>
36+
<th>job complete?</th>
37+
<th>error?</th>
38+
<th>updated at</th>
39+
<th>verdict</th>
40+
<th>confidence</th>
41+
<th>reason</th>
42+
<th>explanation</th>
43+
</tr>
44+
<% @document.assessments.order('created_at DESC').each do |assessment| %>
45+
<% result = assessment&.result_json %>
46+
<% error = assessment&.error %>
47+
<tr class="<%= error.present? ? "error" : result["verdict"] %>">
48+
<td><%= assessment.status %></td>
49+
<td><%= "Error: #{JSON.pretty_generate(error)}" unless error.blank? %></td>
50+
<td><%= long_formatted_datetime(assessment.updated_at) %></td>
51+
<td><%= result["verdict"] %></td>
52+
<td><%= result["confidence"] %></td>
53+
<td><u><%= result["reason"] %></u></td>
54+
<td><%= result["explanation"] %></td>
55+
</tr>
56+
<% end %>
57+
</table>
58+
<% end %>
59+
</div>
60+
<% end %>
61+
<% end %>

config/routes.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ def scoped_navigation_routes(context, navigation)
286286
resources :documents do
287287
get "/archived", to: "documents#archived", on: :collection, as: :archived
288288
get "/confirm", to: "documents#confirm", on: :member, as: :confirm
289+
post :rerun_screener, on: :member
289290
end
290291
resources :notes, only: [:create, :index]
291292
resources :messages, only: [:index]

db/migrate/20251217230133_create_doc_assessments.rb

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,5 @@ def change
1616

1717
t.timestamps
1818
end
19-
20-
# prevents dupes for the same document/file/prompt-version
21-
add_index :doc_assessments, %i[document_id prompt_version input_blob_id], unique: true, name: "index_doc_assessments_on_doc_and_version_and_blob"
2219
end
2320
end

0 commit comments

Comments
 (0)