Skip to content

Commit c4bb4f9

Browse files
BSWANGaboch
authored andcommitted
rdma: support rdma metrics: resource and statistic
Signed-off-by: bingshen.wbs <[email protected]>
1 parent e9f11f7 commit c4bb4f9

File tree

3 files changed

+276
-24
lines changed

3 files changed

+276
-24
lines changed

nl/rdma_link_linux.go

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,41 @@ const (
99
)
1010

1111
const (
12-
RDMA_NLDEV_CMD_GET = 1
13-
RDMA_NLDEV_CMD_SET = 2
14-
RDMA_NLDEV_CMD_NEWLINK = 3
15-
RDMA_NLDEV_CMD_DELLINK = 4
16-
RDMA_NLDEV_CMD_SYS_GET = 6
17-
RDMA_NLDEV_CMD_SYS_SET = 7
12+
RDMA_NLDEV_CMD_GET = 1
13+
RDMA_NLDEV_CMD_SET = 2
14+
RDMA_NLDEV_CMD_NEWLINK = 3
15+
RDMA_NLDEV_CMD_DELLINK = 4
16+
RDMA_NLDEV_CMD_SYS_GET = 6
17+
RDMA_NLDEV_CMD_SYS_SET = 7
18+
RDMA_NLDEV_CMD_RES_GET = 9
19+
RDMA_NLDEV_CMD_STAT_GET = 17
1820
)
1921

2022
const (
21-
RDMA_NLDEV_ATTR_DEV_INDEX = 1
22-
RDMA_NLDEV_ATTR_DEV_NAME = 2
23-
RDMA_NLDEV_ATTR_PORT_INDEX = 3
24-
RDMA_NLDEV_ATTR_CAP_FLAGS = 4
25-
RDMA_NLDEV_ATTR_FW_VERSION = 5
26-
RDMA_NLDEV_ATTR_NODE_GUID = 6
27-
RDMA_NLDEV_ATTR_SYS_IMAGE_GUID = 7
28-
RDMA_NLDEV_ATTR_SUBNET_PREFIX = 8
29-
RDMA_NLDEV_ATTR_LID = 9
30-
RDMA_NLDEV_ATTR_SM_LID = 10
31-
RDMA_NLDEV_ATTR_LMC = 11
32-
RDMA_NLDEV_ATTR_PORT_STATE = 12
33-
RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13
34-
RDMA_NLDEV_ATTR_DEV_NODE_TYPE = 14
35-
RDMA_NLDEV_ATTR_NDEV_NAME = 51
36-
RDMA_NLDEV_ATTR_LINK_TYPE = 65
37-
RDMA_NLDEV_SYS_ATTR_NETNS_MODE = 66
38-
RDMA_NLDEV_NET_NS_FD = 68
23+
RDMA_NLDEV_ATTR_DEV_INDEX = 1
24+
RDMA_NLDEV_ATTR_DEV_NAME = 2
25+
RDMA_NLDEV_ATTR_PORT_INDEX = 3
26+
RDMA_NLDEV_ATTR_CAP_FLAGS = 4
27+
RDMA_NLDEV_ATTR_FW_VERSION = 5
28+
RDMA_NLDEV_ATTR_NODE_GUID = 6
29+
RDMA_NLDEV_ATTR_SYS_IMAGE_GUID = 7
30+
RDMA_NLDEV_ATTR_SUBNET_PREFIX = 8
31+
RDMA_NLDEV_ATTR_LID = 9
32+
RDMA_NLDEV_ATTR_SM_LID = 10
33+
RDMA_NLDEV_ATTR_LMC = 11
34+
RDMA_NLDEV_ATTR_PORT_STATE = 12
35+
RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13
36+
RDMA_NLDEV_ATTR_DEV_NODE_TYPE = 14
37+
RDMA_NLDEV_ATTR_RES_SUMMARY = 15
38+
RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY = 16
39+
RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME = 17
40+
RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR = 18
41+
RDMA_NLDEV_ATTR_NDEV_NAME = 51
42+
RDMA_NLDEV_ATTR_LINK_TYPE = 65
43+
RDMA_NLDEV_SYS_ATTR_NETNS_MODE = 66
44+
RDMA_NLDEV_NET_NS_FD = 68
45+
RDMA_NLDEV_ATTR_STAT_HWCOUNTERS = 80
46+
RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY = 81
47+
RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME = 82
48+
RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE = 83
3949
)

rdma_link_linux.go

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ type RdmaLinkAttrs struct {
1818
FirmwareVersion string
1919
NodeGuid string
2020
SysImageGuid string
21+
NumPorts uint32
2122
}
2223

2324
// Link represents a rdma device from netlink.
@@ -69,6 +70,11 @@ func executeOneGetRdmaLink(data []byte) (*RdmaLink, error) {
6970
r := bytes.NewReader(value)
7071
binary.Read(r, nl.NativeEndian(), &sysGuid)
7172
link.Attrs.SysImageGuid = uint64ToGuidString(sysGuid)
73+
case nl.RDMA_NLDEV_ATTR_PORT_INDEX:
74+
var availablePort uint32
75+
r := bytes.NewReader(value)
76+
binary.Read(r, nl.NativeEndian(), &availablePort)
77+
link.Attrs.NumPorts = availablePort
7278
}
7379
if (len % 4) != 0 {
7480
// Skip pad bytes
@@ -345,3 +351,212 @@ func (h *Handle) RdmaLinkAdd(linkName string, linkType string, netdev string) er
345351
_, err := req.Execute(unix.NETLINK_RDMA, 0)
346352
return err
347353
}
354+
355+
// RdmaResource represents a rdma device resource tracking summaries
356+
type RdmaResource struct {
357+
Index uint32
358+
Name string
359+
RdmaResourceSummaryEntries map[string]uint64
360+
}
361+
362+
// RdmaResourceList list rdma resource tracking information
363+
// Returns all rdma devices resource tracking summary on success or returns error
364+
// otherwise.
365+
// Equivalent to: `rdma resource'
366+
func RdmaResourceList() ([]*RdmaResource, error) {
367+
return pkgHandle.RdmaResourceList()
368+
}
369+
370+
// RdmaResourceList list rdma resource tracking information
371+
// Returns all rdma devices resource tracking summary on success or returns error
372+
// otherwise.
373+
// Equivalent to: `rdma resource'
374+
func (h *Handle) RdmaResourceList() ([]*RdmaResource, error) {
375+
proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_RES_GET)
376+
req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP)
377+
378+
msgs, err := req.Execute(unix.NETLINK_RDMA, 0)
379+
if err != nil {
380+
return nil, err
381+
}
382+
if len(msgs) == 0 {
383+
return nil, fmt.Errorf("No valid response from kernel")
384+
}
385+
var rdmaResources []*RdmaResource
386+
for _, msg := range msgs {
387+
res, err := executeOneGetRdmaResourceList(msg)
388+
if err != nil {
389+
return nil, err
390+
}
391+
rdmaResources = append(rdmaResources, res)
392+
}
393+
return rdmaResources, nil
394+
}
395+
396+
func parseRdmaCounters(counterType uint16, data []byte) (map[string]uint64, error) {
397+
var counterKeyType, counterValueType uint16
398+
switch counterType {
399+
case nl.RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY:
400+
counterKeyType = nl.RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME
401+
counterValueType = nl.RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR
402+
case nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY:
403+
counterKeyType = nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME
404+
counterValueType = nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE
405+
default:
406+
return nil, fmt.Errorf("Invalid counter type: %d", counterType)
407+
}
408+
counters := make(map[string]uint64)
409+
reader := bytes.NewReader(data)
410+
411+
for reader.Len() >= 4 {
412+
_, attrType, _, value := parseNfAttrTLV(reader)
413+
if attrType != counterType {
414+
return nil, fmt.Errorf("Invalid resource summary entry type; %d", attrType)
415+
}
416+
417+
summaryReader := bytes.NewReader(value)
418+
for summaryReader.Len() >= 4 {
419+
_, attrType, len, value := parseNfAttrTLV(summaryReader)
420+
if attrType != counterKeyType {
421+
return nil, fmt.Errorf("Invalid resource summary entry name type; %d", attrType)
422+
}
423+
name := string(value[0 : len-1])
424+
// Skip pad bytes
425+
if (len % 4) != 0 {
426+
summaryReader.Seek(int64(4-(len%4)), seekCurrent)
427+
}
428+
_, attrType, len, value = parseNfAttrTLV(summaryReader)
429+
if attrType != counterValueType {
430+
return nil, fmt.Errorf("Invalid resource summary entry value type; %d", attrType)
431+
}
432+
counters[name] = native.Uint64(value)
433+
}
434+
}
435+
return counters, nil
436+
}
437+
438+
func executeOneGetRdmaResourceList(data []byte) (*RdmaResource, error) {
439+
var res RdmaResource
440+
reader := bytes.NewReader(data)
441+
for reader.Len() >= 4 {
442+
_, attrType, len, value := parseNfAttrTLV(reader)
443+
444+
switch attrType {
445+
case nl.RDMA_NLDEV_ATTR_DEV_INDEX:
446+
var Index uint32
447+
r := bytes.NewReader(value)
448+
binary.Read(r, nl.NativeEndian(), &Index)
449+
res.Index = Index
450+
case nl.RDMA_NLDEV_ATTR_DEV_NAME:
451+
res.Name = string(value[0 : len-1])
452+
case nl.RDMA_NLDEV_ATTR_RES_SUMMARY:
453+
var err error
454+
res.RdmaResourceSummaryEntries, err = parseRdmaCounters(nl.RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY, value)
455+
if err != nil {
456+
return nil, err
457+
}
458+
}
459+
if (len % 4) != 0 {
460+
// Skip pad bytes
461+
reader.Seek(int64(4-(len%4)), seekCurrent)
462+
}
463+
}
464+
return &res, nil
465+
}
466+
467+
// RdmaPortStatistic represents a rdma port statistic counter
468+
type RdmaPortStatistic struct {
469+
PortIndex uint32
470+
Statistics map[string]uint64
471+
}
472+
473+
// RdmaDeviceStatistic represents a rdma device statistic counter
474+
type RdmaDeviceStatistic struct {
475+
RdmaPortStatistics []*RdmaPortStatistic
476+
}
477+
478+
// RdmaStatistic get rdma device statistic counters
479+
// Returns rdma device statistic counters on success or returns error
480+
// otherwise.
481+
// Equivalent to: `rdma statistic show link [DEV]'
482+
func RdmaStatistic(link *RdmaLink) (*RdmaDeviceStatistic, error) {
483+
return pkgHandle.RdmaStatistic(link)
484+
}
485+
486+
// RdmaStatistic get rdma device statistic counters
487+
// Returns rdma device statistic counters on success or returns error
488+
// otherwise.
489+
// Equivalent to: `rdma statistic show link [DEV]'
490+
func (h *Handle) RdmaStatistic(link *RdmaLink) (*RdmaDeviceStatistic, error) {
491+
rdmaLinkStatistic := make([]*RdmaPortStatistic, 0)
492+
for portIndex := uint32(1); portIndex <= link.Attrs.NumPorts; portIndex++ {
493+
portStatistic, err := h.RdmaPortStatisticList(link, portIndex)
494+
if err != nil {
495+
return nil, err
496+
}
497+
rdmaLinkStatistic = append(rdmaLinkStatistic, portStatistic)
498+
}
499+
return &RdmaDeviceStatistic{RdmaPortStatistics: rdmaLinkStatistic}, nil
500+
}
501+
502+
// RdmaPortStatisticList get rdma device port statistic counters
503+
// Returns rdma device port statistic counters on success or returns error
504+
// otherwise.
505+
// Equivalent to: `rdma statistic show link [DEV/PORT]'
506+
func RdmaPortStatisticList(link *RdmaLink, port uint32) (*RdmaPortStatistic, error) {
507+
return pkgHandle.RdmaPortStatisticList(link, port)
508+
}
509+
510+
// RdmaPortStatisticList get rdma device port statistic counters
511+
// Returns rdma device port statistic counters on success or returns error
512+
// otherwise.
513+
// Equivalent to: `rdma statistic show link [DEV/PORT]'
514+
func (h *Handle) RdmaPortStatisticList(link *RdmaLink, port uint32) (*RdmaPortStatistic, error) {
515+
proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_STAT_GET)
516+
req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_REQUEST)
517+
b := make([]byte, 4)
518+
native.PutUint32(b, link.Attrs.Index)
519+
data := nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b)
520+
req.AddData(data)
521+
522+
b = make([]byte, 4)
523+
native.PutUint32(b, port)
524+
data = nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_PORT_INDEX, b)
525+
req.AddData(data)
526+
527+
msgs, err := req.Execute(unix.NETLINK_RDMA, 0)
528+
if err != nil {
529+
return nil, err
530+
}
531+
if len(msgs) != 1 {
532+
return nil, fmt.Errorf("No valid response from kernel")
533+
}
534+
return executeOneGetRdmaPortStatistics(msgs[0])
535+
}
536+
537+
func executeOneGetRdmaPortStatistics(data []byte) (*RdmaPortStatistic, error) {
538+
var stat RdmaPortStatistic
539+
reader := bytes.NewReader(data)
540+
for reader.Len() >= 4 {
541+
_, attrType, len, value := parseNfAttrTLV(reader)
542+
543+
switch attrType {
544+
case nl.RDMA_NLDEV_ATTR_PORT_INDEX:
545+
var Index uint32
546+
r := bytes.NewReader(value)
547+
binary.Read(r, nl.NativeEndian(), &Index)
548+
stat.PortIndex = Index
549+
case nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTERS:
550+
var err error
551+
stat.Statistics, err = parseRdmaCounters(nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY, value)
552+
if err != nil {
553+
return nil, err
554+
}
555+
}
556+
if (len % 4) != 0 {
557+
// Skip pad bytes
558+
reader.Seek(int64(4-(len%4)), seekCurrent)
559+
}
560+
}
561+
return &stat, nil
562+
}

rdma_link_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,3 +205,30 @@ func TestRdmaLinkAddAndDel(t *testing.T) {
205205

206206
checkPresence(linkName, false)
207207
}
208+
209+
func TestRdmaLinkMetrics(t *testing.T) {
210+
minKernelRequired(t, 5, 1)
211+
setupRdmaKModule(t, "rdma_rxe")
212+
if err := RdmaLinkAdd(t.Name(), "rxe", "lo"); err != nil {
213+
t.Fatal(err)
214+
}
215+
link, err := RdmaLinkByName(t.Name())
216+
if err != nil {
217+
t.Fatal(err)
218+
}
219+
defer RdmaLinkDel(t.Name())
220+
resources, err := RdmaResourceList()
221+
if err != nil {
222+
t.Fatal(err)
223+
}
224+
for _, resource := range resources {
225+
t.Logf("resource: %+v", resource)
226+
}
227+
stats, err := RdmaStatistic(link)
228+
if err != nil {
229+
t.Fatal(err)
230+
}
231+
for _, stat := range stats.RdmaPortStatistics {
232+
t.Logf("stat: %+v", stat)
233+
}
234+
}

0 commit comments

Comments
 (0)