Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: BinaryAnalysisPlatform/bap-python
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 1.3.1
Choose a base ref
...
head repository: BinaryAnalysisPlatform/bap-python
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
  • 12 commits
  • 6 files changed
  • 5 contributors

Commits on Jun 4, 2018

  1. fixes parsing section and region statements

    Note: this is a backport from bap-1.3.1 package, as this fix is
    already there, but wasn't commited to the master branch of the
    upstream repository.
    
    The fix enables the special handling for the Section and Region types,
    which use hex number without the 0x prefix. Ideally, we shouldn't
    generate such input, but since historically this happened, we need to
    make our parser robust enough to be able to chew such representation
    also.
    ivg committed Jun 4, 2018
    Copy the full SHA
    c1b609f View commit details
  2. Merge pull request #9 from BinaryAnalysisPlatform/fix-section-region-…

    …parsing
    
    fixes parsing section and region statements
    ivg authored Jun 4, 2018

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    bfb8ba5 View commit details

Commits on Sep 8, 2019

  1. fix the arguments for Values

    zzrcxb committed Sep 8, 2019
    Copy the full SHA
    5c38a96 View commit details

Commits on Sep 9, 2019

  1. Copy the full SHA
    9c81149 View commit details
  2. turn off pylint warning

    zzrcxb committed Sep 9, 2019
    Copy the full SHA
    c13659f View commit details

Commits on Sep 10, 2019

  1. Merge pull request #11 from zzrcxb/master

    fix the arguments for Values
    ivg authored Sep 10, 2019

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    ec594dc View commit details

Commits on Nov 17, 2020

  1. Copy the full SHA
    ce86eb9 View commit details

Commits on Nov 18, 2020

  1. Merge pull request #16 from tnballo/master

    Update low-level RPC API for Python3
    ivg authored Nov 18, 2020

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    f9b5d7f View commit details

Commits on Jan 20, 2023

  1. Import collection ABCs from new path

    Importing the Iterable, Sequence, and Mapping ABCs directly from
    collections was deprecated in Python 3.3 and the aliases were removed in
    Python 3.10.
    
    Attempt to import from the new location, but if it fails because the
    current Python is older than 3.3, fall back to the old location.
    gmacon committed Jan 20, 2023
    Copy the full SHA
    a0dea53 View commit details

Commits on Jan 24, 2023

  1. Merge pull request #18 from gmacon/python-3.10-collections

    Support Python 3.10
    ivg authored Jan 24, 2023

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    0382823 View commit details

Commits on Dec 1, 2023

  1. Smol fixes (#15)

    * Fix a typo
    
    * Disable pylint false positive
    XVilka authored Dec 1, 2023

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    95e606d View commit details
  2. Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    ac0d9f7 View commit details
Showing with 75 additions and 29 deletions.
  1. +1 −1 README.md
  2. +4 −1 src/bap/adt.py
  3. +26 −4 src/bap/bir.py
  4. +19 −10 src/bap/noeval_parser.py
  5. +12 −13 src/bap/rpc.py
  6. +13 −0 tests/test_low_level_interface.py
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -59,7 +59,7 @@ Installation section).

```python
>>> import bap
>>> print '\n'.join(insn.asm for insn in bap.disasm("\x48\x83\xec\x08"))
>>> print '\n'.join(insn.asm for insn in bap.disasm(b"\x48\x83\xec\x08"))
decl %eax
subl $0x8, %esp
```
5 changes: 4 additions & 1 deletion src/bap/adt.py
Original file line number Diff line number Diff line change
@@ -182,7 +182,10 @@ def count_authors(library):
"""

from collections import Iterable,Sequence,Mapping
try:
from collections.abc import Iterable,Sequence,Mapping
except ImportError:
from collections import Iterable,Sequence,Mapping

class ADT(object):
"""Algebraic Data Type.
30 changes: 26 additions & 4 deletions src/bap/bir.py
Original file line number Diff line number Diff line change
@@ -2,7 +2,10 @@

"""BIR - BAP Intermediate Representation"""

from collections import Sequence,Mapping
try:
from collections.abc import Sequence,Mapping
except ImportError:
from collections import Sequence,Mapping
from .adt import *
from .bil import *
from . import noeval_parser
@@ -245,15 +248,26 @@ class Attrs(Map) :
class Attr(ADT) :
"""Attribute is a pair of attribute name and value,
both represented with str"""
pass

@property
def name(self):
"""name of attribute"""
return self.arg[0]

@property
def value(self):
"""value of attribute"""
return self.arg[1]

class Values(Map) :
"""A set of possible values, taken by a phi-node.
It is a mapping from the tid of a preceeding block,
to an expression that denotes a value.
"""
pass
def __init__(self, *args):
super(Map, self).__init__(args) # pylint: disable=bad-super-call
self.elements = dict(args[0])

class Tid(ADT) :
"""Tid(id,name=None) term unique identifier.
@@ -357,7 +371,15 @@ class Annotation(ADT) :
Each annotation denotes an association between a memory region and
some arbitrary property, denoted with an attribute.
"""
pass
@property
def region(self):
"""memory region"""
return self.arg[0]

@property
def attr(self):
"""memory region attribute"""
return self.arg[1]

def parse_addr(str):
return int(str.split(':')[0],16)
29 changes: 19 additions & 10 deletions src/bap/noeval_parser.py
Original file line number Diff line number Diff line change
@@ -2,23 +2,32 @@
'''
Parser for ADT string from bap that does not use eval
The nieve eval-based version runs into out-of-memory conditions on large files
The naive eval-based version runs into out-of-memory conditions on large files
'''
import gc
import sys
import time

# NOTE: uses bap.bir, but cannot import at module level (circular references)
from subprocess import check_output

# bap.1.3 breaks the format of the following types. it prints hexes
# without prefixing them with the `0x` escape. To fix it without
# fixing bap, we will treat integers inside this parents as
# hexadecimals if there is no prefix.
BROKEN_TYPES = [
'Section',
'Region'
]

# NOTE: uses bap.bir, but cannot import at module level (circular references)

def toint(string, start, end):
def toint(string, start, end, base=10):
'''
Convert substring string[start:end] to integer/long without eval
Note: may contain leading whitespace
'''
istr = string[start:end].lstrip()

if sys.version_info > (3,): # then longs don't exist
if istr.endswith('L'):
istr = istr.rstrip('L')
@@ -31,7 +40,7 @@ def toint(string, start, end):
if istr.startswith('0x'):
return of_str(istr, 16)
else:
return of_str(istr)
return of_str(istr, base)

def setup_progress(totalitems):
'''
@@ -159,17 +168,19 @@ def _parse_end(in_c, in_s, i, objs, stk):
raise ParserInputError('Mismatched input stream')
j = stk[-1]
parent = objs[j]
ptyp = parent['typ']
assert isinstance(parent, dict)
assert parent, 'parent is empty'
assert parent['typ'] != 'int', 'parent wrong type: %r' % (parent['typ'])
assert ptyp != 'int', 'parent wrong type: %r' % (parent['typ'])
assert 'children' in parent
if top: # add to parent if non empty
# make real int before appending
if top['typ'] == 'd': # int
try:
top = toint(in_s, k, i)
base = 16 if ptyp in BROKEN_TYPES else 10
top = toint(in_s, k, i, base)
except ValueError:
raise ParserInputError("Integer expected between [%d..%d)" % (top, i))
raise ParserInputError("Integer expected between [%d..%d)" % (k, i))
parent['children'].append(top)
if in_c == ',': # add blank object and move on
# next obj
@@ -179,7 +190,6 @@ def _parse_end(in_c, in_s, i, objs, stk):
return i
else: # we are ending a tuple/list/app do it
# maybe handle apply (num and seq are earlier)
ptyp = parent['typ']
if ptyp == '[':
if in_c != ']':
raise ParserInputError('close %r and open %r mismatch' % (in_c, ptyp))
@@ -325,4 +335,3 @@ def parser(input_str, disable_gc=False, logger=None):
'format': 'adt',
'load': parser
}

25 changes: 12 additions & 13 deletions src/bap/rpc.py
Original file line number Diff line number Diff line change
@@ -112,11 +112,11 @@ def load(self):
if self.msg is None:
self.msg = self.bap.get_resource(self.ident)
if not self._name in self.msg:
if 'error' in msg:
if 'error' in self.msg:
raise ServerError(response)
else:
msg = "Expected {0} msg but got {1}".format(
self._name, msg)
self._name, self.msg)
raise RuntimeError(msg)

def get(self, child):
@@ -126,7 +126,7 @@ def get(self, child):

class Project(Resource):
def __init__(self, ident, bap):
super(Image,self).__init__('program', ident, bap)
super(Image,self).__init__('program', ident, bap) # pylint: disable=bad-super-call

def load_program(self):
self.program = bir.loads(self.get('program'))
@@ -171,7 +171,7 @@ def load_symbols(self):

def get_symbol(self, name, d=None):
try:
return (s for s in self.symbols if s.name == name).next()
return next(s for s in self.symbols if s.name == name)
except StopIteration:
return d

@@ -214,8 +214,8 @@ def __init__(self, mem, parent):

def load_data(self):
try:
url = (urlparse(url) for url in self.links
if urlparse(url).scheme == 'mmap').next()
url = next(urlparse(url) for url in self.links
if urlparse(url).scheme == 'mmap')
qs = parse_qs(url.query)
offset = int(qs['offset'][0])
with open(url.path, "rw+b") as f:
@@ -266,8 +266,8 @@ def __init__(self, server={}):
self.last_id = 0
for attempt in range(RETRIES):
try:
self.capabilities = self.call({'init' : {
'version' : '0.1'}}).next()['capabilities']
self.capabilities = next(self.call({'init' : {
'version' : '0.1'}}))['capabilities']
break
except Exception:
if attempt + 1 == RETRIES:
@@ -278,7 +278,7 @@ def __init__(self, server={}):
if not "capabilities" in self.__dict__:
raise RuntimeError("Failed to connect to BAP server")
self.data = {}
self.temp = NamedTemporaryFile('rw+b', prefix="bap-")
self.temp = NamedTemporaryFile('w+b', prefix="bap-")

def insns(self, src, **kwargs):
req = {'resource' : src}
@@ -300,7 +300,7 @@ def load_file(self, name):
'url' : 'file://' + name}})

def get_resource(self, name):
return self.call({'get_resource' : name}).next()
return next(self.call({'get_resource' : name}))

def load_chunk(self, data, **kwargs):
kwargs.setdefault('url', self.mmap(data))
@@ -341,14 +341,13 @@ def mmap(self, data):
return url

def _load_resource(self, res):
rep = self.call(res).next()
rep = next(self.call(res))
if 'error' in rep:
raise ServerError(rep)
return Id(rep['resource'])


def jsons(r, p=0):
dec = json.JSONDecoder(encoding='utf-8')
dec = json.JSONDecoder()
while True:
obj,p = dec.scan_once(r.text,p)
yield obj
13 changes: 13 additions & 0 deletions tests/test_low_level_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import unittest
import bap

class TestLowLevelInterface(unittest.TestCase):

def test_low_level_interface(self):
asm_str = '\n'.join(insn.asm for insn in bap.disasm(b"\x48\x83\xec\x08"))
self.assertIsNotNone(asm_str)
self.assertIn("\tdecl\t%eax", asm_str)
self.assertIn("\tsubl\t$0x8, %esp", asm_str)

if __name__ == "__main__":
unittest.main()