Skip to content

Commit 2d5caf6

Browse files
committed
Fix: Refactor import functions and avoid async db access, avoids sqlite database lock
1 parent 69d9e07 commit 2d5caf6

File tree

7 files changed

+263
-260
lines changed

7 files changed

+263
-260
lines changed

src/app/templates/app/profile.html

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,7 @@
4646
<form class="p-2 grid-item" method="POST" enctype="multipart/form-data">
4747
{% csrf_token %}
4848
<div class="form-title">The Movie Database</div>
49-
<div class="input-group">
50-
<input type="file" accept=".csv" class="form-control" aria-label="Upload" name="tmdb">
51-
<button class="btn btn-submit bg-dark rounded-end" type="submit"><i class="bi bi-check-lg"></i></button>
52-
</div>
49+
<button name="tmdb" class="btn btn-submit bg-dark rounded-end w-100" type="submit">TMDB Authentication</button>
5350
</form>
5451

5552
<form class="p-2 grid-item" method="POST" enctype="multipart/form-data">

src/app/tests/test_imports.py

Lines changed: 35 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from django.test import TestCase
22
from django.test import override_settings
33

4-
import csv
4+
from unittest.mock import patch
55
import shutil
66
import os
77

@@ -45,71 +45,44 @@ class ImportsTMDB(TestCase):
4545
def setUp(self):
4646
self.credentials = {"username": "test", "password": "12345"}
4747
self.user = User.objects.create_user(**self.credentials)
48-
49-
@override_settings(MEDIA_ROOT=("TMDB"))
50-
def test_import_tmdb(self):
5148
os.makedirs("TMDB")
52-
file_path = os.path.join("TMDB", "ratings.csv")
5349

54-
fields = [
55-
"TMDb ID",
56-
"IMDb ID",
57-
"Type",
58-
"Name",
59-
"Release Date",
60-
"Season Number",
61-
"Episode Number",
62-
"Rating",
63-
"Your Rating",
64-
"Date Rated",
65-
]
66-
data = [
67-
[
68-
"634649",
69-
"tt10872600",
70-
"movie",
71-
"Spider-Man: No Way Home",
72-
"2021-12-15T00:00:00Z",
73-
"",
74-
"",
75-
"8.022",
76-
"7",
77-
"2022-12-17T15:50:35Z",
78-
],
79-
[
80-
"1668",
81-
"tt0108778",
82-
"tv",
83-
"Friends",
84-
"1994-09-22T00:00:00Z",
85-
"",
86-
"",
87-
"8.463",
88-
"10",
89-
"2022-12-17T16:23:01Z",
50+
@override_settings(MEDIA_ROOT=("TMDB"))
51+
@patch("requests.get")
52+
def test_import_tmdb(self, mock_data):
53+
mock_data.return_value.json.return_value = {
54+
"total_pages": 1,
55+
"results": [
56+
{
57+
"id": 361743,
58+
"title": "Top Gun: Maverick",
59+
"rating": 7,
60+
"poster_path": "/62HCnUTziyWcpDaBO2i1DX17ljH.jpg",
61+
},
62+
{
63+
"id": 634649,
64+
"title": "Spider-Man: No Way Home",
65+
"rating": 7,
66+
"poster_path": "/uJYYizSuA9Y3DCs0qS4qWvHfZg4.jpg",
67+
},
9068
],
91-
]
92-
93-
with open(file_path, "w", newline="") as f:
94-
writer = csv.writer(f)
95-
writer.writerow(fields)
96-
writer.writerows(data)
69+
}
70+
fake_url = "https://api.themoviedb.org/3/account/1/rated/movies?api_key=12345&session_id=12345"
71+
images, bulk_add_media = tmdb.process_media_list(
72+
fake_url, "movie", "Completed", self.user, bulk_add_media=[]
73+
)
74+
Media.objects.bulk_create(bulk_add_media)
9775

98-
with open(file_path, "rb") as file:
99-
tmdb.import_tmdb(file, self.user)
100-
self.assertEqual(Media.objects.filter(user=self.user).count(), 2)
101-
self.assertEqual(
102-
Media.objects.filter(user=self.user, media_type="movie").count(), 1
103-
)
104-
self.assertEqual(
105-
Media.objects.filter(user=self.user, media_type="tv").count(), 1
106-
)
107-
self.assertEqual(
108-
Media.objects.get(user=self.user, media_id=634649).score == 7, True
109-
)
110-
self.assertEqual(
111-
Media.objects.get(user=self.user, media_id=1668).score == 10, True
112-
)
76+
self.assertEqual(Media.objects.filter(user=self.user).count(), 2)
77+
self.assertEqual(
78+
Media.objects.filter(user=self.user, media_type="movie").count(), 2
79+
)
80+
self.assertEqual(
81+
Media.objects.get(user=self.user, media_id=634649).score == 7, True
82+
)
83+
self.assertEqual(
84+
Media.objects.get(user=self.user, media_id=361743).score == 7, True
85+
)
11386

11487
def tearDown(self):
11588
shutil.rmtree("TMDB")

src/app/utils/helpers.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
from django.conf import settings
22

33
import aiofiles
4+
import aiohttp
5+
import asyncio
46
import datetime
57
import requests
68
import pathlib
9+
import logging
10+
11+
logger = logging.getLogger(__name__)
712

813

914
def download_image(url, media_type):
@@ -22,6 +27,14 @@ def download_image(url, media_type):
2227
return filename
2328

2429

30+
async def images_downloader(images_to_download, media_type):
31+
async with aiohttp.ClientSession() as session:
32+
tasks = []
33+
for url in images_to_download:
34+
tasks.append(download_image_async(session, url, media_type))
35+
await asyncio.gather(*tasks)
36+
37+
2538
async def download_image_async(session, url, media_type):
2639
# rsplit is used to split the url at the last / and taking the last element
2740
# https://api-cdn.myanimelist.net/images/anime/12/76049.jpg -> 76049.jpg
@@ -36,8 +49,7 @@ async def download_image_async(session, url, media_type):
3649
f = await aiofiles.open(location, mode="wb")
3750
await f.write(await resp.read())
3851
await f.close()
39-
40-
return filename
52+
logger.info(f"Downloaded {filename}")
4153

4254

4355
def clean_data(request, metadata):

src/app/utils/imports/anilist.py

Lines changed: 47 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
from aiohttp import ClientSession
2-
from asyncio import ensure_future, gather, run
3-
1+
import asyncio
42
import datetime
53
import requests
64
import logging
@@ -89,51 +87,51 @@ def import_anilist(username, user):
8987
return "User not found"
9088

9189
# error stores media titles that don't have a corresponding MAL ID
92-
bulk_add_media, error = run(anilist_get_media_list(query, error="", user=user))
93-
Media.objects.bulk_create(bulk_add_media)
90+
error = add_media_list(query, error="", user=user)
9491

9592
logger.info(
9693
f"Finished importing {username} from Anilist"
9794
)
9895
return error
9996

10097

101-
async def anilist_get_media_list(query, error, user):
102-
async with ClientSession() as session:
103-
task = []
104-
for media_type in query["data"]:
105-
for list in query["data"][media_type]["lists"]:
106-
if not list["isCustomList"]:
107-
for content in list["entries"]:
108-
if content["media"]["idMal"] is None:
109-
error += f"\n {content['media']['title']['userPreferred']}"
110-
logger.warning(
111-
f"{media_type.capitalize()}: {content['media']['title']['userPreferred']} has no MAL ID."
112-
)
113-
elif await Media.objects.filter(
114-
media_id=content["media"]["idMal"],
115-
media_type=media_type,
116-
user=user,
117-
).aexists():
118-
logger.warning(
119-
f"{media_type.capitalize()}: {content['media']['title']['userPreferred']} ({content['media']['idMal']}) already exists in database. Skipping..."
120-
)
121-
else:
122-
task.append(
123-
ensure_future(
124-
anilist_get_media(
125-
session, content, media_type, user
126-
)
127-
)
128-
)
129-
logger.info(
130-
f"{media_type.capitalize()}: {content['media']['title']['userPreferred']} ({content['media']['idMal']}) added to import list."
131-
)
132-
133-
return await gather(*task), error
134-
135-
136-
async def anilist_get_media(session, content, media_type, user):
98+
def add_media_list(query, error, user):
99+
bulk_add_media = []
100+
101+
for media_type in query["data"]:
102+
images_to_download = []
103+
for status_list in query["data"][media_type]["lists"]:
104+
if not status_list["isCustomList"]:
105+
for content in status_list["entries"]:
106+
if content["media"]["idMal"] is None:
107+
error += f"\n {content['media']['title']['userPreferred']}"
108+
logger.warning(
109+
f"{media_type.capitalize()}: {content['media']['title']['userPreferred']} has no MAL ID."
110+
)
111+
elif Media.objects.filter(
112+
media_id=content["media"]["idMal"],
113+
media_type=media_type,
114+
user=user,
115+
).exists():
116+
logger.warning(
117+
f"{media_type.capitalize()}: {content['media']['title']['userPreferred']} ({content['media']['idMal']}) already exists, skipping..."
118+
)
119+
else:
120+
images_to_download, bulk_add_media = process_media(
121+
content, media_type, user, images_to_download, bulk_add_media
122+
)
123+
124+
logger.info(
125+
f"{media_type.capitalize()}: {content['media']['title']['userPreferred']} ({content['media']['idMal']}) added to import list."
126+
)
127+
asyncio.run(helpers.images_downloader(images_to_download, media_type))
128+
129+
Media.objects.bulk_create(bulk_add_media)
130+
131+
return error
132+
133+
134+
def process_media(content, media_type, user, images_to_download, bulk_add_media):
137135
if content["status"] == "CURRENT":
138136
status = "Watching"
139137
else:
@@ -166,10 +164,13 @@ async def anilist_get_media(session, content, media_type, user):
166164
end_date=end_date,
167165
)
168166

169-
filename = await helpers.download_image_async(
170-
session, content["media"]["coverImage"]["large"], media_type
171-
)
167+
bulk_add_media.append(media)
168+
169+
image_url = content["media"]["coverImage"]["large"]
170+
images_to_download.append(image_url)
172171

173-
media.image = f"{filename}"
172+
# rsplit is used to split the url at the last / and taking the last element
173+
# https://api-cdn.myanimelist.net/images/anime/12/76049.jpg -> 76049.jpg
174+
media.image = f"{media_type}-{image_url.rsplit('/', 1)[-1]}"
174175

175-
return media
176+
return images_to_download, bulk_add_media

src/app/utils/imports/mal.py

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
from aiohttp import ClientSession
2-
from asyncio import ensure_future, gather, run
31
from decouple import config
42

3+
import asyncio
54
import datetime
65
import requests
76
import logging
@@ -34,6 +33,8 @@ def import_myanimelist(username, user):
3433
# Update the "paging" key with the new "next" URL (if any)
3534
animes["paging"] = next_data["paging"]
3635

36+
bulk_add_media = add_media_list(animes, "anime", user)
37+
3738
manga_url = f"https://api.myanimelist.net/v2/users/{username}/mangalist?fields=list_status&nsfw=true&limit=100"
3839
mangas = requests.get(manga_url, headers=header).json()
3940

@@ -46,43 +47,39 @@ def import_myanimelist(username, user):
4647
# Update the "paging" key with the new "next" URL (if any)
4748
mangas["paging"] = next_data["paging"]
4849

49-
series = {"anime": animes, "manga": mangas}
50+
bulk_add_media.extend(add_media_list(mangas, "manga", user))
5051

51-
bulk_add_media = run(myanilist_get_media_list(series, user))
5252
Media.objects.bulk_create(bulk_add_media)
53-
5453
logger.info(f"Finished importing {username} from MyAnimeList")
5554

5655
return True
5756

5857

59-
async def myanilist_get_media_list(series, user):
60-
async with ClientSession() as session:
61-
task = []
62-
for media_type, media_list in series.items():
63-
for content in media_list["data"]:
64-
if await Media.objects.filter(
65-
media_id=content["node"]["id"],
66-
media_type=media_type,
67-
user=user,
68-
).aexists():
69-
logger.warning(
70-
f"{media_type.capitalize()}: {content['node']['title']} ({content['node']['id']}) already exists in database. Skipping..."
71-
)
72-
else:
73-
task.append(
74-
ensure_future(
75-
myanimelist_get_media(session, content, media_type, user)
76-
)
77-
)
78-
logger.info(
79-
f"{media_type.capitalize()}: {content['node']['title']} ({content['node']['id']}) added to import list."
80-
)
81-
82-
return await gather(*task)
83-
84-
85-
async def myanimelist_get_media(session, content, media_type, user):
58+
def add_media_list(response, media_type, user):
59+
bulk_add_media = []
60+
images_to_download = []
61+
for content in response["data"]:
62+
if Media.objects.filter(
63+
media_id=content["node"]["id"],
64+
media_type=media_type,
65+
user=user,
66+
).exists():
67+
logger.warning(
68+
f"{media_type.capitalize()}: {content['node']['title']} ({content['node']['id']}) already exists, skipping..."
69+
)
70+
else:
71+
images_to_download, bulk_add_media = process_media(content, media_type, user, images_to_download, bulk_add_media)
72+
73+
logger.info(
74+
f"{media_type.capitalize()}: {content['node']['title']} ({content['node']['id']}) added to import list."
75+
)
76+
77+
asyncio.run(helpers.images_downloader(images_to_download, media_type))
78+
79+
return bulk_add_media
80+
81+
82+
def process_media(content, media_type, user, images_to_download, bulk_add_media):
8683
if content["list_status"]["status"] == "plan_to_watch":
8784
content["list_status"]["status"] = "Planning"
8885
elif content["list_status"]["status"] == "on_hold":
@@ -121,12 +118,16 @@ async def myanimelist_get_media(session, content, media_type, user):
121118
media.end_date = None
122119

123120
if "main_picture" in content["node"]:
124-
filename = await helpers.download_image_async(
125-
session, content["node"]["main_picture"]["large"], media_type
126-
)
127-
media.image = f"{filename}"
121+
image_url = content['node']['main_picture']['large']
122+
images_to_download.append(image_url)
123+
124+
# rsplit is used to split the url at the last / and taking the last element
125+
# https://api-cdn.myanimelist.net/images/anime/12/76049.jpg -> 76049.jpg
126+
media.image = f"{media_type}-{image_url.rsplit('/', 1)[-1]}"
128127

129128
else:
130129
media.image = "none.svg"
131130

132-
return media
131+
bulk_add_media.append(media)
132+
133+
return images_to_download, bulk_add_media

0 commit comments

Comments
 (0)