Skip to content

Commit e1b050c

Browse files
authored
Merge pull request #599 from Jordan-M-Young/feat/pg-semicolon-support
Feat/pg semicolon support
2 parents 620c25a + f20a2c3 commit e1b050c

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

connectorx-python/connectorx/__init__.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ def read_sql(
206206
"""
207207
if isinstance(query, list) and len(query) == 1:
208208
query = query[0]
209+
query = remove_ending_semicolon(query)
210+
209211

210212
if isinstance(conn, dict):
211213
assert partition_on is None and isinstance(
@@ -214,6 +216,9 @@ def read_sql(
214216
assert (
215217
protocol is None
216218
), "Federated query does not support specifying protocol for now"
219+
220+
query = remove_ending_semicolon(query)
221+
217222
result = _read_sql2(query, conn)
218223
df = reconstruct_arrow(result)
219224
if return_type == "pandas":
@@ -232,6 +237,9 @@ def read_sql(
232237
return df
233238

234239
if isinstance(query, str):
240+
241+
query = remove_ending_semicolon(query)
242+
235243
if partition_on is None:
236244
queries = [query]
237245
partition_query = None
@@ -245,7 +253,7 @@ def read_sql(
245253
}
246254
queries = None
247255
elif isinstance(query, list):
248-
queries = query
256+
queries = [remove_ending_semicolon(subquery) for subquery in query]
249257
partition_query = None
250258

251259
if partition_on is not None:
@@ -377,3 +385,11 @@ def reconstruct_pandas(df_infos: Dict[str, Any]):
377385
)
378386
df = pd.DataFrame(block_manager)
379387
return df
388+
389+
390+
def remove_ending_semicolon(query: str) -> str:
391+
if query[-1] == ';':
392+
query= list(query)
393+
query.pop(-1)
394+
query = "".join(query)
395+
return query

connectorx-python/connectorx/tests/test_postgres.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,4 +1138,42 @@ def test_postgres_name_type(postgres_url: str) -> None:
11381138
"test_name": pd.Series(["0", "21", "someName", "101203203-1212323-22131235"]),
11391139
},
11401140
)
1141+
assert_frame_equal(df, expected, check_names=True)
1142+
1143+
1144+
1145+
def test_postgres_semicolon_support_str_query(postgres_url: str) -> None:
1146+
query = "SELECT test_name FROM test_types;"
1147+
df = read_sql(postgres_url, query)
1148+
expected = pd.DataFrame(
1149+
data={
1150+
"test_name": pd.Series(["0", "21", "someName", "101203203-1212323-22131235"]),
1151+
},
1152+
)
1153+
assert_frame_equal(df, expected, check_names=True)
1154+
1155+
1156+
def test_postgres_semicolon_list_queries(postgres_url: str) -> None:
1157+
queries = [
1158+
"SELECT * FROM test_table WHERE test_int < 2;",
1159+
"SELECT * FROM test_table WHERE test_int >= 2;",
1160+
]
1161+
1162+
df = read_sql(postgres_url, query=queries)
1163+
1164+
expected = pd.DataFrame(
1165+
index=range(6),
1166+
data={
1167+
"test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"),
1168+
"test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"),
1169+
"test_str": pd.Series(
1170+
["a", "str1", "str2", "b", "c", None], dtype="object"
1171+
),
1172+
"test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"),
1173+
"test_bool": pd.Series(
1174+
[None, True, False, False, None, True], dtype="boolean"
1175+
),
1176+
},
1177+
)
1178+
df.sort_values(by="test_int", inplace=True, ignore_index=True)
11411179
assert_frame_equal(df, expected, check_names=True)

0 commit comments

Comments
 (0)