Skip to content

Make subdirs in tests/io/data #29513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 13, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/plotting/_misc.py
Original file line number Diff line number Diff line change
@@ -364,7 +364,7 @@ def parallel_coordinates(
--------
>>> from matplotlib import pyplot as plt
>>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master'
'/pandas/tests/data/iris.csv')
'/pandas/tests/data/csv/iris.csv')
>>> pd.plotting.parallel_coordinates(
df, 'Name',
color=('#556270', '#4ECDC4', '#C7F464'))
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
5 changes: 3 additions & 2 deletions pandas/tests/io/excel/conftest.py
Original file line number Diff line number Diff line change
@@ -24,11 +24,12 @@ def merge_cells(request):


@pytest.fixture
def df_ref():
def df_ref(datapath):
"""
Obtain the reference data from read_csv with the Python engine.
"""
df_ref = read_csv("test1.csv", index_col=0, parse_dates=True, engine="python")
filepath = datapath("io", "data", "csv", "test1.csv")
df_ref = read_csv(filepath, index_col=0, parse_dates=True, engine="python")
return df_ref


2 changes: 1 addition & 1 deletion pandas/tests/io/excel/test_odf.py
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@
def cd_and_set_engine(monkeypatch, datapath):
func = functools.partial(pd.read_excel, engine="odf")
monkeypatch.setattr(pd, "read_excel", func)
monkeypatch.chdir(datapath("io", "data"))
monkeypatch.chdir(datapath("io", "data", "excel"))


def test_read_invalid_types_raises():
12 changes: 7 additions & 5 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
@@ -81,7 +81,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch, read_ext):
pytest.skip()

func = partial(pd.read_excel, engine=engine)
monkeypatch.chdir(datapath("io", "data"))
monkeypatch.chdir(datapath("io", "data", "excel"))
monkeypatch.setattr(pd, "read_excel", func)

def test_usecols_int(self, read_ext, df_ref):
@@ -502,9 +502,11 @@ def test_read_from_http_url(self, read_ext):
if read_ext == ".ods": # TODO: remove once on master
pytest.skip()

# TODO: alimcmaster1 - revert to master
url = (
"https://raw.github.com/pandas-dev/pandas/master/"
"pandas/tests/io/data/test1" + read_ext
"https://raw.githubusercontent.com/alimcmaster1"
"/pandas/mcmali-tests-dir-struct/"
"pandas/tests/io/data/excel/test1" + read_ext
)
url_table = pd.read_excel(url)
local_table = pd.read_excel("test1" + read_ext)
@@ -527,7 +529,7 @@ def test_read_from_s3_url(self, read_ext, s3_resource):
def test_read_from_file_url(self, read_ext, datapath):

# FILE
localtable = os.path.join(datapath("io", "data"), "test1" + read_ext)
localtable = os.path.join(datapath("io", "data", "excel"), "test1" + read_ext)
local_table = pd.read_excel(localtable)

try:
@@ -828,7 +830,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch, read_ext):
pytest.skip()

func = partial(pd.ExcelFile, engine=engine)
monkeypatch.chdir(datapath("io", "data"))
monkeypatch.chdir(datapath("io", "data", "excel"))
monkeypatch.setattr(pd, "ExcelFile", func)

def test_excel_passes_na(self, read_ext):
2 changes: 1 addition & 1 deletion pandas/tests/io/excel/test_xlrd.py
Original file line number Diff line number Diff line change
@@ -35,7 +35,7 @@ def test_read_xlrd_book(read_ext, frame):

# TODO: test for openpyxl as well
def test_excel_table_sheet_by_index(datapath, read_ext):
path = datapath("io", "data", "test1{}".format(read_ext))
path = datapath("io", "data", "excel", "test1{}".format(read_ext))
with pd.ExcelFile(path) as excel:
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, "asdf")
28 changes: 20 additions & 8 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
@@ -208,21 +208,33 @@ def test_read_expands_user_home_dir(
@pytest.mark.parametrize(
"reader, module, path",
[
(pd.read_csv, "os", ("io", "data", "iris.csv")),
(pd.read_table, "os", ("io", "data", "iris.csv")),
(pd.read_fwf, "os", ("io", "data", "fixed_width_format.txt")),
(pd.read_excel, "xlrd", ("io", "data", "test1.xlsx")),
(pd.read_feather, "feather", ("io", "data", "feather-0_3_1.feather")),
(pd.read_csv, "os", ("data", "iris.csv")),
(pd.read_table, "os", ("data", "iris.csv")),
(
pd.read_fwf,
"os",
("io", "data", "fixed_width", "fixed_width_format.txt"),
),
(pd.read_excel, "xlrd", ("io", "data", "excel", "test1.xlsx")),
(
pd.read_feather,
"feather",
("io", "data", "feather", "feather-0_3_1.feather"),
),
(
pd.read_hdf,
"tables",
("io", "data", "legacy_hdf", "datetimetz_object.h5"),
),
(pd.read_stata, "os", ("io", "data", "stata10_115.dta")),
(pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")),
(pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")),
(pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")),
(pd.read_msgpack, "os", ("io", "msgpack", "data", "frame.mp")),
(pd.read_pickle, "os", ("io", "data", "categorical.0.25.0.pickle")),
(
pd.read_pickle,
"os",
("io", "data", "pickle", "categorical.0.25.0.pickle"),
),
],
)
def test_read_fspath_all(self, reader, module, path, datapath):
@@ -296,7 +308,7 @@ def test_write_fspath_hdf5(self):

@pytest.fixture
def mmap_file(datapath):
return datapath("io", "data", "test_mmap.csv")
return datapath("io", "data", "csv", "test_mmap.csv")


class TestMMapWrapper:
34 changes: 18 additions & 16 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
@@ -63,7 +63,7 @@ def test_bs4_version_fails(monkeypatch, datapath):

monkeypatch.setattr(bs4, "__version__", "4.2")
with pytest.raises(ImportError, match="Pandas requires version"):
read_html(datapath("io", "data", "spam.html"), flavor="bs4")
read_html(datapath("io", "data", "html", "spam.html"), flavor="bs4")


def test_invalid_flavor():
@@ -78,7 +78,7 @@ def test_invalid_flavor():
@td.skip_if_no("bs4")
@td.skip_if_no("lxml")
def test_same_ordering(datapath):
filename = datapath("io", "data", "valid_markup.html")
filename = datapath("io", "data", "html", "valid_markup.html")
dfs_lxml = read_html(filename, index_col=0, flavor=["lxml"])
dfs_bs4 = read_html(filename, index_col=0, flavor=["bs4"])
assert_framelist_equal(dfs_lxml, dfs_bs4)
@@ -95,10 +95,10 @@ def test_same_ordering(datapath):
class TestReadHtml:
@pytest.fixture(autouse=True)
def set_files(self, datapath):
self.spam_data = datapath("io", "data", "spam.html")
self.spam_data = datapath("io", "data", "html", "spam.html")
self.spam_data_kwargs = {}
self.spam_data_kwargs["encoding"] = "UTF-8"
self.banklist_data = datapath("io", "data", "banklist.html")
self.banklist_data = datapath("io", "data", "html", "banklist.html")

@pytest.fixture(autouse=True, scope="function")
def set_defaults(self, flavor, request):
@@ -133,9 +133,11 @@ def test_banklist_url(self):

@tm.network
def test_spam_url(self):
# TODO: alimcmaster1 - revert to master
url = (
"https://raw.githubusercontent.com/pandas-dev/pandas/master/"
"pandas/tests/io/data/spam.html"
"https://raw.githubusercontent.com/alimcmaster1/"
"pandas/mcmali-tests-dir-struct/"
"pandas/tests/io/data/html/spam.html"
)
df1 = self.read_html(url, ".*Water.*")
df2 = self.read_html(url, "Unit")
@@ -376,7 +378,7 @@ def test_python_docs_table(self):
@pytest.mark.slow
def test_thousands_macau_stats(self, datapath):
all_non_nan_table_index = -2
macau_data = datapath("io", "data", "macau.html")
macau_data = datapath("io", "data", "html", "macau.html")
dfs = self.read_html(macau_data, index_col=0, attrs={"class": "style1"})
df = dfs[all_non_nan_table_index]

@@ -385,7 +387,7 @@ def test_thousands_macau_stats(self, datapath):
@pytest.mark.slow
def test_thousands_macau_index_col(self, datapath):
all_non_nan_table_index = -2
macau_data = datapath("io", "data", "macau.html")
macau_data = datapath("io", "data", "html", "macau.html")
dfs = self.read_html(macau_data, index_col=0, header=0)
df = dfs[all_non_nan_table_index]

@@ -566,7 +568,7 @@ def test_parse_header_of_non_string_column(self):
tm.assert_frame_equal(result, expected)

def test_nyse_wsj_commas_table(self, datapath):
data = datapath("io", "data", "nyse_wsj.html")
data = datapath("io", "data", "html", "nyse_wsj.html")
df = self.read_html(data, index_col=0, header=0, attrs={"class": "mdcTable"})[0]

expected = Index(
@@ -594,7 +596,7 @@ def try_remove_ws(x):

df = self.read_html(self.banklist_data, "Metcalf", attrs={"id": "table"})[0]
ground_truth = read_csv(
datapath("io", "data", "banklist.csv"),
datapath("io", "data", "csv", "banklist.csv"),
converters={"Updated Date": Timestamp, "Closing Date": Timestamp},
)
assert df.shape == ground_truth.shape
@@ -889,19 +891,19 @@ def test_parse_dates_combine(self):
tm.assert_frame_equal(newdf, res[0])

def test_computer_sales_page(self, datapath):
data = datapath("io", "data", "computer_sales_page.html")
data = datapath("io", "data", "html", "computer_sales_page.html")
msg = (
r"Passed header=\[0,1\] are too many "
r"rows for this multi_index of columns"
)
with pytest.raises(ParserError, match=msg):
self.read_html(data, header=[0, 1])

data = datapath("io", "data", "computer_sales_page.html")
data = datapath("io", "data", "html", "computer_sales_page.html")
assert self.read_html(data, header=[1, 2])

def test_wikipedia_states_table(self, datapath):
data = datapath("io", "data", "wikipedia_states.html")
data = datapath("io", "data", "html", "wikipedia_states.html")
assert os.path.isfile(data), "{data!r} is not a file".format(data=data)
assert os.path.getsize(data), "{data!r} is an empty file".format(data=data)
result = self.read_html(data, "Arizona", header=1)[0]
@@ -1095,14 +1097,14 @@ def test_multiple_header_rows(self):
tm.assert_frame_equal(expected_df, html_df)

def test_works_on_valid_markup(self, datapath):
filename = datapath("io", "data", "valid_markup.html")
filename = datapath("io", "data", "html", "valid_markup.html")
dfs = self.read_html(filename, index_col=0)
assert isinstance(dfs, list)
assert isinstance(dfs[0], DataFrame)

@pytest.mark.slow
def test_fallback_success(self, datapath):
banklist_data = datapath("io", "data", "banklist.html")
banklist_data = datapath("io", "data", "html", "banklist.html")
self.read_html(banklist_data, ".*Water.*", flavor=["lxml", "html5lib"])

def test_to_html_timestamp(self):
@@ -1240,7 +1242,7 @@ def run(self):
# force import check by reinitalising global vars in html.py
reload(pandas.io.html)

filename = datapath("io", "data", "valid_markup.html")
filename = datapath("io", "data", "html", "valid_markup.html")
helper_thread1 = ErrorThread(target=self.read_html, args=(filename,))
helper_thread2 = ErrorThread(target=self.read_html, args=(filename,))

12 changes: 7 additions & 5 deletions pandas/tests/io/test_pickle.py
Original file line number Diff line number Diff line change
@@ -202,23 +202,25 @@ def test_legacy_sparse_warning(datapath):
Generated with

>>> df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [0, 0, 1, 1]}).to_sparse()
>>> df.to_pickle("pandas/tests/io/data/sparseframe-0.20.3.pickle.gz",
>>> df.to_pickle("pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz",
... compression="gzip")

>>> s = df['B']
>>> s.to_pickle("pandas/tests/io/data/sparseseries-0.20.3.pickle.gz",
>>> s.to_pickle("pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz",
... compression="gzip")
"""
with tm.assert_produces_warning(FutureWarning):
simplefilter("ignore", DeprecationWarning) # from boto
pd.read_pickle(
datapath("io", "data", "sparseseries-0.20.3.pickle.gz"), compression="gzip"
datapath("io", "data", "pickle", "sparseseries-0.20.3.pickle.gz"),
compression="gzip",
)

with tm.assert_produces_warning(FutureWarning):
simplefilter("ignore", DeprecationWarning) # from boto
pd.read_pickle(
datapath("io", "data", "sparseframe-0.20.3.pickle.gz"), compression="gzip"
datapath("io", "data", "pickle", "sparseframe-0.20.3.pickle.gz"),
compression="gzip",
)


@@ -382,7 +384,7 @@ def test_read(self, protocol, get_random_path):
def test_unicode_decode_error():
# pickle file written with py27, should be readable without raising
# UnicodeDecodeError, see GH#28645
path = os.path.join(os.path.dirname(__file__), "data", "test_py27.pkl")
path = os.path.join(os.path.dirname(__file__), "data", "pickle", "test_py27.pkl")
df = pd.read_pickle(path)

# just test the columns are correct since the values are random
10 changes: 5 additions & 5 deletions pandas/tests/io/test_spss.py
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@

def test_spss_labelled_num(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
fname = datapath("io", "data", "labelled-num.sav")
fname = datapath("io", "data", "spss", "labelled-num.sav")

df = pd.read_spss(fname, convert_categoricals=True)
expected = pd.DataFrame({"VAR00002": "This is one"}, index=[0])
@@ -23,7 +23,7 @@ def test_spss_labelled_num(datapath):

def test_spss_labelled_num_na(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
fname = datapath("io", "data", "labelled-num-na.sav")
fname = datapath("io", "data", "spss", "labelled-num-na.sav")

df = pd.read_spss(fname, convert_categoricals=True)
expected = pd.DataFrame({"VAR00002": ["This is one", None]})
@@ -37,7 +37,7 @@ def test_spss_labelled_num_na(datapath):

def test_spss_labelled_str(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
fname = datapath("io", "data", "labelled-str.sav")
fname = datapath("io", "data", "spss", "labelled-str.sav")

df = pd.read_spss(fname, convert_categoricals=True)
expected = pd.DataFrame({"gender": ["Male", "Female"]})
@@ -51,7 +51,7 @@ def test_spss_labelled_str(datapath):

def test_spss_umlauts(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
fname = datapath("io", "data", "umlauts.sav")
fname = datapath("io", "data", "spss", "umlauts.sav")

df = pd.read_spss(fname, convert_categoricals=True)
expected = pd.DataFrame(
@@ -67,7 +67,7 @@ def test_spss_umlauts(datapath):

def test_spss_usecols(datapath):
# usecols must be list-like
fname = datapath("io", "data", "labelled-num.sav")
fname = datapath("io", "data", "spss", "labelled-num.sav")

with pytest.raises(TypeError, match="usecols must be list-like."):
pd.read_spss(fname, usecols="VAR00002")
2 changes: 1 addition & 1 deletion pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
@@ -275,7 +275,7 @@ def _get_exec(self):
else:
return self.conn.cursor()

@pytest.fixture(params=[("io", "data", "iris.csv")])
@pytest.fixture(params=[("data", "iris.csv")])
def load_iris_data(self, datapath, request):
import io

4 changes: 2 additions & 2 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@

@pytest.fixture
def dirpath(datapath):
return datapath("io", "data")
return datapath("io", "data", "stata")


@pytest.fixture
@@ -42,7 +42,7 @@ def parsed_114(dirpath):
class TestStata:
@pytest.fixture(autouse=True)
def setup_method(self, datapath):
self.dirpath = datapath("io", "data")
self.dirpath = datapath("io", "data", "stata")
self.dta1_114 = os.path.join(self.dirpath, "stata1_114.dta")
self.dta1_117 = os.path.join(self.dirpath, "stata1_117.dta")