diff --git a/README.md b/README.md index 15f9f9b..2a211cc 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,15 @@ part of your geospatial project. # Version Changes +## 2.3.x + +### New Features: +- Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. +- Equality comparisons between Records now also require the fields to be the same (and in the same order). + +### Development: +- Code quality tools run on PyShp + ## 2.3.1 ### Bug fixes: @@ -1467,6 +1476,7 @@ Karim Bahgat karanrn Kurt Schwehr Kyle Kelley +Lionel Guez Louis Tiao Marcin Cuprjak mcuprjak diff --git a/shapefile.py b/shapefile.py index 12af74d..2b22ba9 100644 --- a/shapefile.py +++ b/shapefile.py @@ -854,6 +854,12 @@ def __dir__(self): ) # plus field names (random order if Python version < 3.6) return default + fnames + def __eq__(self, other): + if isinstance(other, self.__class__): + if self.__field_positions != other.__field_positions: + return False + return list.__eq__(self, other) + class ShapeRecord(object): """A ShapeRecord object containing a shape along with its attributes. @@ -1325,7 +1331,9 @@ def __restrictIndex(self, i): if self.numRecords: rmax = self.numRecords - 1 if abs(i) > rmax: - raise IndexError("Shape or Record index out of range.") + raise IndexError( + "Shape or Record index: %s out of range. Max index: %s" % (i, rmax) + ) if i < 0: i = range(self.numRecords)[i] return i @@ -1809,18 +1817,35 @@ def records(self, fields=None): records.append(r) return records - def iterRecords(self, fields=None): + def iterRecords(self, fields=None, start=0, stop=None): """Returns a generator of records in a dbf file. Useful for large shapefiles or dbf files. To only read some of the fields, specify the 'fields' arg as a list of one or more fieldnames. + By default yields all records. Otherwise, specify start + (default: 0) or stop (default: number_of_records) + to only yield record numbers i, where + start <= i < stop, (or + start <= i < number_of_records + stop + if stop < 0). """ if self.numRecords is None: self.__dbfHeader() f = self.__getFileObj(self.dbf) - f.seek(self.__dbfHdrLength) + start = self.__restrictIndex(start) + if stop is None: + stop = self.numRecords + elif abs(stop) > self.numRecords: + raise IndexError( + "abs(stop): %s exceeds number of records: %s." + % (abs(stop), self.numRecords) + ) + elif stop < 0: + stop = range(self.numRecords)[stop] + recSize = self.__recordLength + f.seek(self.__dbfHdrLength + (start * recSize)) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in xrange(self.numRecords): + for i in xrange(start, stop): r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) diff --git a/test_shapefile.py b/test_shapefile.py index 08561c6..7984e91 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -972,6 +972,51 @@ def test_record_oid(): assert shaperec.record.oid == i +def test_iterRecords_start_stop(): + """ + Assert that Reader.iterRecords(start, stop) + returns the correct records, as if searched for + by index with Reader.record + """ + + with shapefile.Reader("shapefiles/blockgroups") as sf: + N = len(sf) + + # Arbitrary selection of record indices + # (there are 663 records in blockgroups.dbf). + for i in [ + 0, + 1, + 2, + 3, + 5, + 11, + 17, + 33, + 51, + 103, + 170, + 234, + 435, + 543, + N - 3, + N - 2, + N - 1, + ]: + for record in sf.iterRecords(start=i): + assert record == sf.record(record.oid) + + for record in sf.iterRecords(stop=i): + assert record == sf.record(record.oid) + + for stop in range(i, len(sf)): + # test negative indexing from end, as well as + # positive values of stop, and its default + for stop_arg in (stop, stop - len(sf)): + for record in sf.iterRecords(start=i, stop=stop_arg): + assert record == sf.record(record.oid) + + def test_shape_oid(): """ Assert that the shape's oid attribute returns