Open
Description
Concatting the results of the connectorx.read_sql methods results in virtual memory explosion.
When deep-copying the Arrow table, the garbage collector can do its work and free the memory.
But the purpose of Arrow is just to have zero copy memory.
import copy
import connectorx as cx
import psutil
import pyarrow as pa
def main():
conn = "mssql://user:my_super_secret_password@server:1433/db"
query = "SELECT top 1 * FROM table"
table = None
for x in range(0, 10000):
print(f'run #{x}')
temp_table: pa.Table = cx.read_sql(conn, query, return_type="arrow")
#when enabled, the the virtual memory explosion doesn't occur
#temp_table = copy.deepcopy(temp_table)
table = temp_table if not table else pa.concat_tables([table, temp_table])
print(psutil.Process().memory_info())
if __name__ == "__main__":
main()
Metadata
Metadata
Assignees
Labels
No labels