Skip to content

Commit 8675f95

Browse files
lsloanssciollajonespm
committed
upgrade Python to v3.10 (iss. tl-its-umich-edu#1406) (tl-its-umich-edu#1426)
* tl-its-umich-edu#1406 - spelling correction & clean-up Cleaned up trailing spaces on a few lines. * tl-its-umich-edu#1406 - upgrade to Python 3.10 Resolves vulnerability CVE-2015-20107. * tl-its-umich-edu#1406 - upgrade `pandas` for Python 3.10 Upgrade `pandas` and related modules to work with Python 3.10. * tl-its-umich-edu#1406 - fix all warehouse connections Derive PostgreSQL connect string for data warehouse DB from Django connections, then create a single engine for all queries that will use that DB. * Create utility function for creating mysql and postgres engines; apply to views' * Remove other database conn prep * Reuse create_sqlalchemy_engine in data_validation * Remove unused variable * Make a couple minor modifications to db_util * Make one read_sql call one line * Remove unused import * Update numpy, pangres; change mypy version * Remove type parameter, use Django ENGINE * Reverting change to validate_udw_vs_udp since it already created an engine Co-authored-by: Sam Sciolla <[email protected]> Co-authored-by: Code Hugger (Matthew Jones) <[email protected]>
1 parent 0747812 commit 8675f95

File tree

8 files changed

+57
-44
lines changed

8 files changed

+57
-44
lines changed

dashboard/common/db_util.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,44 @@
11
# Some utility functions used by other classes in this project
22
import logging
33
from datetime import datetime
4-
from typing import Dict, List, TypedDict, Union
4+
from typing import Dict, List, Literal, TypedDict, Union
5+
from urllib.parse import quote_plus
56

6-
from dateutil.parser import parse
77
import django
8+
from sqlalchemy import create_engine
9+
from sqlalchemy.engine import Engine
10+
from dateutil.parser import parse
811
from django.conf import settings
12+
from django.contrib.auth.models import User as DjangoUser
913
from django_cron.models import CronJobLog
1014

1115
from dashboard.models import Course, User
1216

13-
from django.contrib.auth.models import User as DjangoUser
1417

1518
logger = logging.getLogger(__name__)
1619

20+
BACKENDS_PATH = 'django.db.backends.'
21+
22+
23+
class DjangoDBParams(TypedDict):
24+
ENGINE: Literal['django.db.backends.mysql', 'django.db.backends.postgresql']
25+
NAME: str
26+
USER: str
27+
PASSWORD: str
28+
HOST: str
29+
PORT: int
30+
31+
32+
def create_sqlalchemy_engine(db_params: DjangoDBParams) -> Engine:
33+
new_db_params: DjangoDBParams = db_params.copy()
34+
new_db_params['PASSWORD'] = quote_plus(db_params['PASSWORD'])
35+
36+
core_string = '{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(**new_db_params)
37+
if new_db_params['ENGINE'] == (BACKENDS_PATH + 'mysql'):
38+
return create_engine(f'mysql+mysqldb://{core_string}?charset=utf8mb4')
39+
else:
40+
return create_engine('postgresql://' + core_string)
41+
1742

1843
def canvas_id_to_incremented_id(canvas_id):
1944
try:

dashboard/cron.py

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import logging
33
from collections import namedtuple
44
from typing import Any, Dict, List, Union
5-
from urllib.parse import quote_plus
65

76
import hjson
87
import pandas as pd
@@ -14,7 +13,7 @@
1413
from django.db.models import QuerySet
1514
from django_cron import CronJobBase, Schedule
1615
from google.cloud import bigquery
17-
from sqlalchemy import create_engine, types
16+
from sqlalchemy import types
1817
from sqlalchemy.engine import ResultProxy
1918

2019
from dashboard.common import db_util, utils
@@ -23,20 +22,8 @@
2322

2423
logger = logging.getLogger(__name__)
2524

26-
db_name = settings.DATABASES['default']['NAME']
27-
db_user = settings.DATABASES['default']['USER']
28-
db_password = settings.DATABASES['default']['PASSWORD']
29-
db_host = settings.DATABASES['default']['HOST']
30-
db_port = settings.DATABASES['default']['PORT']
31-
logger.debug("db-name:" + db_name)
32-
logger.debug("db-user:" + db_user)
33-
34-
engine = create_engine("mysql+mysqldb://{user}:{password}@{host}:{port}/{db}?charset=utf8mb4"
35-
.format(db=db_name, # your mysql database name
36-
user=db_user, # your mysql user for the database
37-
password=quote_plus(db_password), # password for user
38-
host=db_host,
39-
port=db_port))
25+
engine = db_util.create_sqlalchemy_engine(settings.DATABASES['default'])
26+
data_warehouse_engine = db_util.create_sqlalchemy_engine(settings.DATABASES['DATA_WAREHOUSE'])
4027

4128
# Set up queries array from configuration file
4229
CRON_QUERY_FILE = settings.CRON_QUERY_FILE
@@ -59,7 +46,7 @@ def split_list(a_list: list, size: int = 20):
5946

6047

6148
def util_function(data_warehouse_course_id, sql_string, mysql_table, table_identifier=None, param_object=None):
62-
df = pd.read_sql(sql_string, conns['DATA_WAREHOUSE'], params=param_object)
49+
df = pd.read_sql(sql_string, data_warehouse_engine, params=param_object)
6350
logger.debug(df)
6451

6552
# Sql returns boolean value so grouping course info along with it so that this could be stored in the DB table.
@@ -149,7 +136,7 @@ def verify_course_ids(self):
149136
course_sql = queries['course'].format(course_id=course_id)
150137
logger.debug(course_sql)
151138

152-
course_df = pd.read_sql(course_sql, conns['DATA_WAREHOUSE'])
139+
course_df = pd.read_sql(course_sql, data_warehouse_engine)
153140
logger.debug(course_df)
154141

155142
# error out when course id is invalid, otherwise add DataFrame to list
@@ -227,7 +214,7 @@ def update_canvas_resource(self):
227214
course_ids = list(map(str, self.valid_locked_course_ids))
228215
file_sql = queries['resource']
229216
logger.debug(file_sql)
230-
df_attach = pd.read_sql(file_sql, conns['DATA_WAREHOUSE'], params={'course_ids': tuple(course_ids)})
217+
df_attach = pd.read_sql(file_sql, data_warehouse_engine, params={'course_ids': tuple(course_ids)})
231218
logger.debug(df_attach)
232219
# Update these back again based on the dataframe
233220
# Remove any rows where file_state is not available!
@@ -421,7 +408,7 @@ def update_resource_access(self):
421408
# First, update resource table
422409
try:
423410
dtype = {'resource_id': types.VARCHAR(255)}
424-
pangres.upsert(engine=engine, df=resource_df,
411+
pangres.upsert(con=engine, df=resource_df,
425412
table_name='resource', if_row_exists='update',
426413
create_schema=False, add_new_columns=False,
427414
dtype=dtype)
@@ -540,7 +527,7 @@ def update_term(self) -> str:
540527

541528
term_sql: str = queries['term']
542529
logger.debug(term_sql)
543-
warehouse_term_df: pd.DataFrame = pd.read_sql(term_sql, conns['DATA_WAREHOUSE'])
530+
warehouse_term_df: pd.DataFrame = pd.read_sql(term_sql, data_warehouse_engine)
544531

545532
existing_terms_ids: List[int] = [term.id for term in list(AcademicTerms.objects.all())]
546533
new_term_ids: List[int] = [int(id) for id in warehouse_term_df['id'].to_list() if id not in existing_terms_ids]

dashboard/views.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from django.conf import settings
1212
from django.contrib import auth
1313
from django.core.exceptions import ObjectDoesNotExist
14-
from django.db import connection as conn
1514
from django.forms.models import model_to_dict
1615
from django.http import HttpResponse, HttpResponseForbidden, JsonResponse
1716
from django.shortcuts import redirect, render
@@ -20,7 +19,7 @@
2019
from rules.contrib.views import permission_required, objectgetter
2120

2221
from dashboard.common import utils
23-
from dashboard.common.db_util import canvas_id_to_incremented_id
22+
from dashboard.common.db_util import canvas_id_to_incremented_id, create_sqlalchemy_engine
2423
from dashboard.event_logs_types.event_logs_types import EventLogTypes
2524
from dashboard.models import Course, CourseViewOption, Resource, UserDefaultSelection, User
2625
from dashboard.settings import COURSES_ENABLED, RESOURCE_VALUES, RESOURCE_VALUES_MAP, \
@@ -44,6 +43,8 @@
4443

4544
BinningGrade = namedtuple('BinningGrade', ['value', 'index', 'binning_all'])
4645

46+
app_engine = create_sqlalchemy_engine(settings.DATABASES['default'])
47+
4748

4849
def gpa_map(grade):
4950
if grade is None:
@@ -285,7 +286,7 @@ def resource_access_within_week(request, course_id=0):
285286
elif (grade == GRADE_C):
286287
total_number_student_sql += " and current_grade >= 70 and current_grade < 80"
287288

288-
total_number_student_df = pd.read_sql(total_number_student_sql, conn, params={
289+
total_number_student_df = pd.read_sql(total_number_student_sql, app_engine, params={
289290
"course_id": course_id,
290291
"enrollment_type": "StudentEnrollment"
291292
})
@@ -323,7 +324,7 @@ def resource_access_within_week(request, course_id=0):
323324
endTimeString = end.strftime('%Y%m%d') + "000000"
324325
logger.debug(sqlString)
325326
logger.debug("start time=" + startTimeString + " end_time=" + endTimeString)
326-
df = pd.read_sql(sqlString, conn, params={
327+
df = pd.read_sql(sqlString, app_engine, params={
327328
"start_time": startTimeString,
328329
"end_time": endTimeString,
329330
"course_id": course_id,
@@ -387,7 +388,7 @@ def resource_access_within_week(request, course_id=0):
387388
logger.debug(selfSqlString)
388389
logger.debug("current_user=" + current_user)
389390

390-
selfDf= pd.read_sql(selfSqlString, conn, params={"current_user":current_user, "course_id": course_id})
391+
selfDf= pd.read_sql(selfSqlString, app_engine, params={"current_user":current_user, "course_id": course_id})
391392
output_df = output_df.join(selfDf.set_index('resource_id_type'), on=['resource_id_type'], how='left')
392393
output_df["total_percent"] = output_df.apply(lambda row: row[GRADE_A] + row[GRADE_B] + row[GRADE_C] + row[GRADE_LOW] + row.NO_GRADE, axis=1)
393394

@@ -448,7 +449,7 @@ def grade_distribution(request, course_id=0):
448449
(select current_grade from user where sis_name=%(current_user)s and course_id=%(course_id)s) as current_user_grade
449450
from user where course_id=%(course_id)s and enrollment_type=%(enrollment_type)s
450451
"""
451-
df = pd.read_sql(grade_score_sql, conn, params={
452+
df = pd.read_sql(grade_score_sql, app_engine, params={
452453
'current_user': current_user,
453454
'course_id': course_id,
454455
'enrollment_type': 'StudentEnrollment'
@@ -663,7 +664,7 @@ def get_course_assignments(course_id):
663664
(select distinct assignment_id,avg_score from submission where course_id=%(course_id)s) as sub on sub.assignment_id = assign.assignment_id
664665
"""
665666

666-
assignments_in_course = pd.read_sql(sql,conn,params={'course_id': course_id}, parse_dates={'due_date': '%Y-%m-%d'})
667+
assignments_in_course = pd.read_sql(sql, app_engine, params={'course_id': course_id}, parse_dates={'due_date': '%Y-%m-%d'})
667668
# No assignments found in the course
668669
if assignments_in_course.empty or (assignments_in_course['assignment_id'] == 0).all():
669670
logger.info('The course %s don\'t seems to have assignment data' % course_id)
@@ -697,7 +698,7 @@ def get_course_assignments(course_id):
697698
def get_user_assignment_submission(current_user,assignments_in_course_df, course_id):
698699
sql = "select assignment_id, submitted_at, score, graded_date from submission where " \
699700
"user_id=(select user_id from user where sis_name = %(current_user)s and course_id = %(course_id)s ) and course_id = %(course_id)s"
700-
assignment_submissions = pd.read_sql(sql, conn, params={'course_id': course_id, "current_user": current_user})
701+
assignment_submissions = pd.read_sql(sql, app_engine, params={'course_id': course_id, "current_user": current_user})
701702
if assignment_submissions.empty:
702703
logger.info('The user %s seems to be a not student in the course.' % current_user)
703704
# manually adding the columns for display in UI
@@ -772,7 +773,7 @@ def find_current_week(row):
772773

773774
def is_weight_considered(course_id):
774775
url = "select consider_weight from assignment_weight_consideration where course_id=%(course_id)s"
775-
df = pd.read_sql(url, conn, params={"course_id": course_id})
776+
df = pd.read_sql(url, app_engine, params={"course_id": course_id})
776777
value = df['consider_weight'].iloc[0]
777778
return value
778779

dockerfiles/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ RUN npm prune --production && \
3030
find node_modules -type d -name "examples" -print0 | xargs -0 rm -rf
3131

3232
# FROM directive instructing base image to build upon
33-
FROM python:3.8-slim AS app
33+
FROM python:3.10-slim AS app
3434

3535
# EXPOSE port 5000 to allow communication to/from server
3636
EXPOSE 5000

dockerfiles/Dockerfile.openshift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ RUN npm prune --production && \
2929
find node_modules -type d -name "examples" -print0 | xargs -0 rm -rf
3030

3131
# FROM directive instructing base image to build upon
32-
FROM docker-registry.default.svc:5000/openshift/python:3.8-slim AS app
32+
FROM docker-registry.default.svc:5000/openshift/python:3.10-slim AS app
3333

3434
# EXPOSE port 5000 to allow communication to/from server
3535
EXPOSE 5000

mypy.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
[mypy]
2-
python_version = 3.8
2+
python_version = 3.10

requirements.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ django-filter==2.4.0
2525
rules==3.0
2626

2727
# These should be okay to update minors
28-
numpy==1.22.0
29-
pandas==1.3.1
30-
pangres==2.3.1
28+
numpy==1.23.3
29+
pandas==1.4.4
30+
pangres==4.1.2
3131

3232
SQLAlchemy==1.4.22
3333
psycopg2==2.9.1
3434
mysqlclient==2.0.3
35-
google-cloud-bigquery[pandas]==2.24.0
35+
google-cloud-bigquery[pandas]==3.3.2
3636

3737
debugpy==1.4.1
3838
jsonschema==3.2.0

start.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
#!/bin/bash
1+
#!/bin/bash
22

3-
# Case insenstive match
3+
# Case insensitive match
44
shopt -s nocaseglob
55

66
if [ -z "${ENV_FILE}" ]; then
@@ -48,7 +48,7 @@ else
4848
fi
4949

5050
echo "Waiting for DB"
51-
while ! nc -z "${MYSQL_HOST}" "${MYSQL_PORT}"; do
51+
while ! nc -z "${MYSQL_HOST}" "${MYSQL_PORT}"; do
5252
sleep 1 # wait 1 second before check again
5353
done
5454

@@ -89,7 +89,7 @@ if [ "${IS_CRON_POD:-"false"}" == "false" ]; then
8989
--workers="${GUNICORN_WORKERS}" \
9090
--timeout="${GUNICORN_TIMEOUT}" \
9191
${GUNICORN_RELOAD}
92-
92+
9393
else
9494
if [ -z "${CRONTAB_SCHEDULE}" ]; then
9595
echo "CRONTAB_SCHEDULE environment variable not set, crontab cannot be started. Please set this to a crontab acceptable format."

0 commit comments

Comments
 (0)