Skip to content

fix: DEV-2236: Stored XSS via SVG file #2273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jul 25, 2022
58 changes: 57 additions & 1 deletion label_studio/data_import/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license.
"""
import os.path
import time
import logging
import drf_yasg.openapi as openapi
Expand Down Expand Up @@ -449,7 +450,7 @@ class UploadedFileResponse(generics.RetrieveAPIView):
def get(self, *args, **kwargs):
request = self.request
filename = kwargs['filename']
file = settings.UPLOAD_DIR + ('/' if not settings.UPLOAD_DIR.endswith('/') else '') + filename
file = os.path.join(settings.UPLOAD_DIR, filename)
logger.debug(f'Fetch uploaded file by user {request.user} => {file}')
file_upload = FileUpload.objects.filter(file=file).last()

Expand All @@ -460,6 +461,61 @@ def get(self, *args, **kwargs):
if file.storage.exists(file.name):
content_type, encoding = mimetypes.guess_type(str(file.name))
content_type = content_type or 'application/octet-stream'
mime_type_filter = [
'image/svg+xml',
# 'application/xhtml+xml',
# 'text/html',
# 'text/javascript'
]
if content_type in mime_type_filter:
return RangedFileResponse(
request, self._allowlist_svg(file),
content_type=content_type)

return RangedFileResponse(request, file.open(mode='rb'), content_type=content_type)

else:
return Response(status=status.HTTP_404_NOT_FOUND)

def _allowlist_svg(self, dirty_file):
"""Define allowed tags for SVG files"""
from lxml.html import clean
import tempfile

allow_tags = [
'xml',
'DOCTYPE',
'svg',
'circle',
'ellipse',
'line',
'path',
'polygon',
'polyline',
'rect'
]

cleaner = clean.Cleaner(
allow_tags=allow_tags,
style=True,
links=True,
add_nofollow=False,
page_structure=True,
safe_attrs_only=False,
remove_unknown_tags=False)

try:
fd_dirty = open(dirty_file.path, 'r')
dirty_xml = fd_dirty.read()
clean_xml = cleaner.clean_html(dirty_xml)
clean_xml = clean_xml.replace('<div>', '').replace('</div>', '')
fd_dirty.close()
fd_clean = tempfile.NamedTemporaryFile(delete=False)
try:
fd_clean.write(clean_xml.encode())
fd_clean.seek(0)
return fd_clean
except IOError as error:
logger.debug(f'Sanitize SVG file error {error}')
except (IOError, OSError, FileNotFoundError) as error:
logger.debug(f'open SVG file error {error}')
2 changes: 2 additions & 0 deletions label_studio/data_import/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def content(self):
body = getattr(self, '_file_body')
else:
body = self.file.read().decode('utf-8')
# TODO: ? filter out malicious/harmful content from file.
# e.g. bleach
setattr(self, '_file_body', body)
return body

Expand Down