-
Notifications
You must be signed in to change notification settings - Fork 692
WIP: Blocks partial results #988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cd0f870
11a7a58
2571277
dd7d40e
74a198d
005981f
1729d48
985022f
1bb3dd8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -5,12 +5,15 @@ import ( | |||||
| "context" | ||||||
| "encoding/binary" | ||||||
| "encoding/hex" | ||||||
| "fmt" | ||||||
| "io" | ||||||
| "io/ioutil" | ||||||
| "net/http" | ||||||
| "strings" | ||||||
|
|
||||||
| "github.com/go-kit/kit/log" | ||||||
| "github.com/golang/protobuf/proto" | ||||||
| "github.com/grafana/tempo/pkg/tempopb" | ||||||
| "github.com/opentracing/opentracing-go" | ||||||
| "github.com/pkg/errors" | ||||||
| "github.com/weaveworks/common/user" | ||||||
|
|
@@ -25,6 +28,9 @@ const ( | |||||
|
|
||||||
| querierPrefix = "/querier" | ||||||
| queryDelimiter = "?" | ||||||
|
|
||||||
| // todo: make configurable | ||||||
| maxBlockErrCount = 5 | ||||||
| ) | ||||||
|
|
||||||
| func ShardingWare(queryShards int, logger log.Logger) Middleware { | ||||||
|
|
@@ -154,25 +160,36 @@ func mergeResponses(ctx context.Context, rrs []RequestResponse) (*http.Response, | |||||
|
|
||||||
| var errCode = http.StatusOK | ||||||
| var errBody io.ReadCloser | ||||||
| var combinedTrace []byte | ||||||
| var shardMissCount = 0 | ||||||
| var combinedTrace *tempopb.Trace | ||||||
| var combinedTraceBytes []byte | ||||||
| var shardMissCount, totalBlockErrCount int | ||||||
| for _, rr := range rrs { | ||||||
| if rr.Response.StatusCode == http.StatusOK { | ||||||
| partialContent := rr.Response.StatusCode == http.StatusPartialContent | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: renaming this to |
||||||
| if rr.Response.StatusCode == http.StatusOK || partialContent { | ||||||
| body, err := io.ReadAll(rr.Response.Body) | ||||||
| rr.Response.Body.Close() | ||||||
| if err != nil { | ||||||
| return nil, errors.Wrap(err, "error reading response body at query frontend") | ||||||
| } | ||||||
|
Comment on lines
169
to
173
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if we should fail the entire request here. If we can't read the body from one of the requests, isn't this also a partial result? |
||||||
|
|
||||||
| if len(combinedTrace) == 0 { | ||||||
| combinedTrace = body | ||||||
| } else { | ||||||
| combinedTrace, _, err = model.CombineTraceBytes(combinedTrace, body, model.TracePBEncoding, model.TracePBEncoding) | ||||||
| if err != nil { | ||||||
| // will result in a 500 internal server error | ||||||
| return nil, errors.Wrap(err, "error combining traces at query frontend") | ||||||
| var resp tempopb.TraceByIDResponse | ||||||
| err = proto.Unmarshal(body, &resp) | ||||||
|
Comment on lines
+175
to
+176
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The querier only seems to marshal the trace part of How can we unmarshal a full |
||||||
| if err != nil { | ||||||
| return nil, errors.Wrap(err, "error reading response body at query frontend") | ||||||
| } | ||||||
|
|
||||||
| if partialContent { | ||||||
| totalBlockErrCount += int(resp.BlockErrCount) | ||||||
| if totalBlockErrCount > maxBlockErrCount { | ||||||
| return nil, fmt.Errorf("too many block queries failed (max %d)", maxBlockErrCount) | ||||||
| } | ||||||
|
Comment on lines
+183
to
185
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if it's useful to fail on |
||||||
| } | ||||||
|
|
||||||
| if combinedTrace == nil { | ||||||
| combinedTrace = resp.Trace | ||||||
| } else { | ||||||
| combinedTrace, _, _, _ = model.CombineTraceProtos(combinedTrace, resp.Trace) | ||||||
| } | ||||||
| } else if rr.Response.StatusCode != http.StatusNotFound { | ||||||
| errCode = rr.Response.StatusCode | ||||||
| errBody = rr.Response.Body | ||||||
|
|
@@ -181,6 +198,14 @@ func mergeResponses(ctx context.Context, rrs []RequestResponse) (*http.Response, | |||||
| } | ||||||
| } | ||||||
|
|
||||||
| if combinedTrace != nil { | ||||||
| var err error | ||||||
| combinedTraceBytes, err = combinedTrace.Marshal() | ||||||
| if err != nil { | ||||||
| return nil, errors.Wrap(err, "error marshaling combined trace at query frontend") | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if shardMissCount == len(rrs) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can move this block above |
||||||
| return &http.Response{ | ||||||
| StatusCode: http.StatusNotFound, | ||||||
|
|
@@ -190,12 +215,18 @@ func mergeResponses(ctx context.Context, rrs []RequestResponse) (*http.Response, | |||||
| } | ||||||
|
|
||||||
| if errCode == http.StatusOK { | ||||||
| statusCode := http.StatusOK | ||||||
| if totalBlockErrCount > 0 { | ||||||
| // If there are failed blocks, and we haven't returned with an error, | ||||||
| // signal the upstream that the result contains partial results. | ||||||
| statusCode = http.StatusPartialContent | ||||||
| } | ||||||
| return &http.Response{ | ||||||
| StatusCode: http.StatusOK, | ||||||
| Body: ioutil.NopCloser(bytes.NewReader(combinedTrace)), | ||||||
| StatusCode: statusCode, | ||||||
| Body: ioutil.NopCloser(bytes.NewReader(combinedTraceBytes)), | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| // ContentLength header is added to log the size of response in the Tripperware in frontend.go | ||||||
| // This could be overwritten if the query client and Tempo negotiate compression | ||||||
| ContentLength: int64(len(combinedTrace)), | ||||||
| ContentLength: int64(len(combinedTraceBytes)), | ||||||
| Header: http.Header{}, | ||||||
| }, nil | ||||||
| } | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -84,6 +84,9 @@ func (q *Querier) TraceByIDHandler(w http.ResponseWriter, r *http.Request) { | |
| http.Error(w, err.Error(), http.StatusInternalServerError) | ||
| return | ||
| } | ||
| if resp.BlockErrCount > 0 { // If some blocks failed, return 206 | ||
| w.WriteHeader(http.StatusPartialContent) | ||
| } | ||
| _, err = w.Write(b) | ||
| if err != nil { | ||
| http.Error(w, err.Error(), http.StatusInternalServerError) | ||
|
|
@@ -94,7 +97,7 @@ func (q *Querier) TraceByIDHandler(w http.ResponseWriter, r *http.Request) { | |
|
|
||
| span.SetTag("response marshalling format", util.JSONTypeHeaderValue) | ||
| marshaller := &jsonpb.Marshaler{} | ||
| err = marshaller.Marshal(w, resp.Trace) | ||
| err = marshaller.Marshal(w, resp) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a difference between our responses depending on whether the caller accepts protobuf or not: if the caller accepts protobuf we return But if the caller requests something else (aka JSON) we marshal the entire {
"trace": ...,
"blockErrCount": ...
}Why not also return |
||
| if err != nil { | ||
| http.Error(w, err.Error(), http.StatusInternalServerError) | ||
| return | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -185,13 +185,19 @@ func (q *Querier) FindTraceByID(ctx context.Context, req *tempopb.TraceByIDReque | |
| ot_log.Int("combinedTraces", traceCountTotal)) | ||
| } | ||
|
|
||
| var blockErrsCount uint32 | ||
| if req.QueryMode == QueryModeBlocks || req.QueryMode == QueryModeAll { | ||
| span.LogFields(ot_log.String("msg", "searching store")) | ||
| partialTraces, dataEncodings, err := q.store.Find(opentracing.ContextWithSpan(ctx, span), userID, req.TraceID, req.BlockStart, req.BlockEnd) | ||
| partialTraces, dataEncodings, blockErrs, err := q.store.Find(opentracing.ContextWithSpan(ctx, span), userID, req.TraceID, req.BlockStart, req.BlockEnd) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't use blockErrs in a meaningful way (we only use the count, not the errors itself). Maybe we should only return an int instead of |
||
| // err contains unrecoverable errors | ||
| // errs querying blocks are contained in blockErrs | ||
|
Comment on lines
+192
to
+193
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add these comments to |
||
| if err != nil { | ||
| // todo: change err log to specify what failed | ||
| return nil, errors.Wrap(err, "error querying store in Querier.FindTraceByID") | ||
| } | ||
|
|
||
| blockErrsCount = uint32(len(blockErrs)) | ||
|
|
||
| span.LogFields(ot_log.String("msg", "done searching store")) | ||
|
|
||
| if len(partialTraces) != 0 { | ||
|
|
@@ -226,7 +232,8 @@ func (q *Querier) FindTraceByID(ctx context.Context, req *tempopb.TraceByIDReque | |
| } | ||
|
|
||
| return &tempopb.TraceByIDResponse{ | ||
| Trace: completeTrace, | ||
| Trace: completeTrace, | ||
| BlockErrCount: blockErrsCount, | ||
| }, nil | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: we don't use
combinedTraceBytesin this for-loop yet, so we can move the declaration a bit more down in this function. This makes the code a bit easier to read as we don't have to worry about this variable yet.