Skip to content

Commit 2789049

Browse files
author
Adrian Cole
committed
Adds trace IDs to dependency links
TODO: make this conditional, as existing dependency linker jobs won't need IDs and keeping them might make spark jobs move more data than they currently need.
1 parent f260456 commit 2789049

File tree

3 files changed

+139
-10
lines changed

3 files changed

+139
-10
lines changed

zipkin2/src/main/java/zipkin2/DependencyLink.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
import java.io.Serializable;
1818
import java.io.StreamCorruptedException;
1919
import java.nio.charset.Charset;
20+
import java.util.ArrayList;
21+
import java.util.Collections;
22+
import java.util.List;
2023
import java.util.Locale;
2124
import zipkin2.codec.DependencyLinkBytesDecoder;
2225
import zipkin2.codec.DependencyLinkBytesEncoder;
@@ -51,13 +54,34 @@ public long errorCount() {
5154
return errorCount;
5255
}
5356

57+
/**
58+
* When not empty, all trace IDs on this link.
59+
*
60+
* <p>Note: the count of trace IDs can be less than {@link #callCount()} when a single trace
61+
* makes multiple calls across the same link.
62+
*/
63+
public List<String> callTraceIds() {
64+
return callTraceIds;
65+
}
66+
67+
/**
68+
* When {@link #callTraceIds()} is not empty, all trace IDs with errors on this link.
69+
*
70+
* <p>Note: the count of trace IDs can be less than {@link #errorCount()} when a single trace
71+
* makes multiple calls across the same link.
72+
*/
73+
public List<String> errorTraceIds() {
74+
return errorTraceIds;
75+
}
76+
5477
public Builder toBuilder() {
5578
return new Builder(this);
5679
}
5780

5881
public static final class Builder {
5982
String parent, child;
6083
long callCount, errorCount;
84+
List<String> callTraceIds, errorTraceIds;
6185

6286
Builder() {
6387
}
@@ -67,6 +91,11 @@ public static final class Builder {
6791
this.child = source.child;
6892
this.callCount = source.callCount;
6993
this.errorCount = source.errorCount;
94+
this.callTraceIds =
95+
source.callTraceIds.isEmpty() ? null : new ArrayList<>(source.callTraceIds);
96+
this.errorTraceIds =
97+
source.errorTraceIds.isEmpty() ? null : new ArrayList<>(source.errorTraceIds);
98+
7099
}
71100

72101
public Builder parent(String parent) {
@@ -91,6 +120,16 @@ public Builder errorCount(long errorCount) {
91120
return this;
92121
}
93122

123+
public Builder callTraceIds(List<String> callTraceIds) {
124+
this.callTraceIds = callTraceIds;
125+
return this;
126+
}
127+
128+
public Builder errorTraceIds(List<String> errorTraceIds) {
129+
this.errorTraceIds = errorTraceIds;
130+
return this;
131+
}
132+
94133
public DependencyLink build() {
95134
String missing = "";
96135
if (parent == null) missing += " parent";
@@ -108,12 +147,17 @@ public DependencyLink build() {
108147
// See https://github.com/openzipkin/zipkin/issues/1879
109148
final String parent, child;
110149
final long callCount, errorCount;
150+
final List<String> callTraceIds, errorTraceIds;
111151

112152
DependencyLink(Builder builder) {
113153
parent = builder.parent;
114154
child = builder.child;
115155
callCount = builder.callCount;
116156
errorCount = builder.errorCount;
157+
callTraceIds = builder.callTraceIds == null ? Collections.emptyList()
158+
: new ArrayList<>(builder.callTraceIds);
159+
errorTraceIds = builder.errorTraceIds == null ? Collections.emptyList()
160+
: new ArrayList<>(builder.errorTraceIds);
117161
}
118162

119163
@Override public boolean equals(Object o) {

zipkin2/src/main/java/zipkin2/internal/DependencyLinker.java

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
import java.util.ArrayList;
1717
import java.util.Iterator;
1818
import java.util.LinkedHashMap;
19+
import java.util.LinkedHashSet;
1920
import java.util.List;
2021
import java.util.Map;
22+
import java.util.Set;
2123
import java.util.logging.Logger;
2224
import zipkin2.DependencyLink;
2325
import zipkin2.Span;
@@ -38,6 +40,8 @@ public final class DependencyLinker {
3840
private final Logger logger;
3941
private final Map<Pair, Long> callCounts = new LinkedHashMap<>();
4042
private final Map<Pair, Long> errorCounts = new LinkedHashMap<>();
43+
private final Map<Pair, Set<String>> callTraceIds = new LinkedHashMap<>();
44+
private final Map<Pair, Set<String>> errorTraceIds = new LinkedHashMap<>();
4145

4246
public DependencyLinker() {
4347
this(Logger.getLogger(DependencyLinker.class.getName()));
@@ -84,10 +88,11 @@ public DependencyLinker putTrace(Iterator<Span> spans) {
8488
if (!spans.hasNext()) return this;
8589
Span first = spans.next();
8690
list.add(first);
87-
if (logger.isLoggable(FINE)) logger.fine("linking trace " + first.traceId());
91+
String traceId = first.traceId();
92+
if (logger.isLoggable(FINE)) logger.fine("linking trace " + traceId);
8893

8994
// Build a tree based on spanId and parentId values
90-
Node.TreeBuilder<Span> builder = new Node.TreeBuilder<>(logger, MERGE_RPC, first.traceId());
95+
Node.TreeBuilder<Span> builder = new Node.TreeBuilder<>(logger, MERGE_RPC, traceId);
9196
builder.addNode(first.parentId(), first.id(), first);
9297
while (spans.hasNext()) {
9398
Span next = spans.next();
@@ -156,7 +161,7 @@ public DependencyLinker putTrace(Iterator<Span> spans) {
156161
if (parent == null || child == null) {
157162
logger.fine("cannot link messaging span to its broker; skipping");
158163
} else {
159-
addLink(parent, child, isError);
164+
addLink(traceId, parent, child, isError);
160165
}
161166
continue;
162167
}
@@ -172,7 +177,7 @@ public DependencyLinker putTrace(Iterator<Span> spans) {
172177
// Check for this and backfill a link from the nearest remote to that service as necessary.
173178
if (kind == Kind.CLIENT && serviceName != null && !rpcAncestorName.equals(serviceName)) {
174179
logger.fine("detected missing link to client span");
175-
addLink(rpcAncestorName, serviceName, false); // we don't know if there's an error here
180+
addLink(traceId, rpcAncestorName, serviceName, false); // we don't know if there's an error here
176181
}
177182

178183
// Local spans may be between the current node and its remote parent
@@ -191,7 +196,7 @@ public DependencyLinker putTrace(Iterator<Span> spans) {
191196
continue;
192197
}
193198

194-
addLink(parent, child, isError);
199+
addLink(traceId, parent, child, isError);
195200
}
196201
return this;
197202
}
@@ -209,17 +214,19 @@ Span findRpcAncestor(Node<Span> current) {
209214
return null;
210215
}
211216

212-
void addLink(String parent, String child, boolean isError) {
217+
void addLink(String traceId, String parent, String child, boolean isError) {
213218
if (logger.isLoggable(FINE)) {
214219
logger.fine("incrementing " + (isError ? "error " : "") + "link " + parent + " -> " + child);
215220
}
216221
Pair key = new Pair(parent, child);
222+
add(callTraceIds, key, traceId);
217223
if (callCounts.containsKey(key)) {
218224
callCounts.put(key, callCounts.get(key) + 1);
219225
} else {
220226
callCounts.put(key, 1L);
221227
}
222228
if (!isError) return;
229+
add(errorTraceIds, key, traceId);
223230
if (errorCounts.containsKey(key)) {
224231
errorCounts.put(key, errorCounts.get(key) + 1);
225232
} else {
@@ -228,29 +235,41 @@ void addLink(String parent, String child, boolean isError) {
228235
}
229236

230237
public List<DependencyLink> link() {
231-
return link(callCounts, errorCounts);
238+
return link(callCounts, errorCounts, callTraceIds, errorTraceIds);
232239
}
233240

234241
/** links are merged by mapping to parent/child and summing corresponding links */
235242
public static List<DependencyLink> merge(Iterable<DependencyLink> in) {
236243
Map<Pair, Long> callCounts = new LinkedHashMap<>();
237244
Map<Pair, Long> errorCounts = new LinkedHashMap<>();
245+
Map<Pair, Set<String>> callTraceIds = new LinkedHashMap<>();
246+
Map<Pair, Set<String>> errorTraceIds = new LinkedHashMap<>();
238247

239248
for (DependencyLink link : in) {
240249
Pair parentChild = new Pair(link.parent(), link.child());
241250
long callCount = callCounts.containsKey(parentChild) ? callCounts.get(parentChild) : 0L;
242251
callCount += link.callCount();
243252
callCounts.put(parentChild, callCount);
253+
for (int i = 0, length = link.callTraceIds().size(); i < length; i++) {
254+
add(callTraceIds, parentChild, link.callTraceIds().get(i));
255+
}
244256
long errorCount = errorCounts.containsKey(parentChild) ? errorCounts.get(parentChild) : 0L;
245257
errorCount += link.errorCount();
246258
errorCounts.put(parentChild, errorCount);
259+
for (int i = 0, length = link.errorTraceIds().size(); i < length; i++) {
260+
add(callTraceIds, parentChild, link.errorTraceIds().get(i));
261+
}
247262
}
248263

249-
return link(callCounts, errorCounts);
264+
return link(callCounts, errorCounts, callTraceIds, errorTraceIds);
250265
}
251266

252-
static List<DependencyLink> link(Map<Pair, Long> callCounts,
253-
Map<Pair, Long> errorCounts) {
267+
static List<DependencyLink> link(
268+
Map<Pair, Long> callCounts,
269+
Map<Pair, Long> errorCounts,
270+
Map<Pair, Set<String>> callTraceIds,
271+
Map<Pair, Set<String>> errorTraceIds
272+
) {
254273
List<DependencyLink> result = new ArrayList<>(callCounts.size());
255274
for (Map.Entry<Pair, Long> entry : callCounts.entrySet()) {
256275
Pair parentChild = entry.getKey();
@@ -259,6 +278,8 @@ static List<DependencyLink> link(Map<Pair, Long> callCounts,
259278
.child(parentChild.right)
260279
.callCount(entry.getValue())
261280
.errorCount(errorCounts.containsKey(parentChild) ? errorCounts.get(parentChild) : 0L)
281+
.callTraceIds(maybeGet(callTraceIds, parentChild))
282+
.errorTraceIds(maybeGet(errorTraceIds, parentChild))
262283
.build());
263284
}
264285
return result;
@@ -290,4 +311,19 @@ public int hashCode() {
290311
return h$;
291312
}
292313
}
314+
315+
static void add(Map<Pair, Set<String>> linkToTraceIds, Pair link, String traceId) {
316+
if (linkToTraceIds.containsKey(link)) {
317+
linkToTraceIds.get(link).add(traceId);
318+
} else {
319+
LinkedHashSet<String> traceIds = new LinkedHashSet<>();
320+
traceIds.add(traceId);
321+
linkToTraceIds.put(link, traceIds);
322+
}
323+
}
324+
325+
static List<String> maybeGet(Map<Pair, Set<String>> linkToTraceIds, Pair link) {
326+
Set<String> result = linkToTraceIds.get(link);
327+
return result == null ? null : new ArrayList<>(result);
328+
}
293329
}

zipkin2/src/test/java/zipkin2/internal/DependencyLinkerTest.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
import static java.util.Arrays.asList;
2929
import static org.assertj.core.api.Assertions.assertThat;
30+
import static org.assertj.core.api.Assertions.tuple;
3031

3132
public class DependencyLinkerTest {
3233
static final List<Span> TRACE = asList(
@@ -36,6 +37,7 @@ public class DependencyLinkerTest {
3637
.toBuilder().shared(true).build(),
3738
span2("a", "b", "c", Kind.CLIENT, "app", "db", true)
3839
);
40+
static final List<String> TRACE_ID = asList(TRACE.get(0).traceId());
3941

4042
List<String> messages = new ArrayList<>();
4143

@@ -61,6 +63,33 @@ public void linksSpans() {
6163
DependencyLink.newBuilder().parent("web").child("app").callCount(1L).build(),
6264
DependencyLink.newBuilder().parent("app").child("db").callCount(1L).errorCount(1L).build()
6365
);
66+
67+
assertThat(new DependencyLinker().putTrace(TRACE.iterator()).link())
68+
.extracting("parent", "child", "callTraceIds", "errorTraceIds")
69+
.containsExactly(
70+
tuple("web", "app", TRACE_ID, Collections.emptyList()),
71+
tuple("app", "db", TRACE_ID, TRACE_ID)
72+
);
73+
}
74+
75+
@Test
76+
public void differentiatesErrorTraceIdFromNonError() {
77+
DependencyLinker linker = new DependencyLinker();
78+
79+
linker.putTrace(asList(
80+
span2("a", "a", "b", Kind.CLIENT, "web", "app", false)
81+
).iterator());
82+
83+
linker.putTrace(asList(
84+
span2("b", "a", "b", Kind.CLIENT, "app", "db", true)
85+
).iterator());
86+
87+
assertThat(linker.link())
88+
.extracting("parent", "child", "callTraceIds", "errorTraceIds")
89+
.containsExactly(
90+
tuple("web", "app", asList("000000000000000a"), Collections.emptyList()),
91+
tuple("app", "db", asList("000000000000000b"), asList("000000000000000b"))
92+
);
6493
}
6594

6695
/**
@@ -345,6 +374,13 @@ public void singleHostSpans_multipleChildren() {
345374
.errorCount(1L)
346375
.build()
347376
);
377+
378+
// only one trace ID result eventhough there are multiple calls
379+
assertThat(new DependencyLinker().putTrace(trace.iterator()).link())
380+
.extracting("parent", "child", "callTraceIds", "errorTraceIds")
381+
.containsExactly(
382+
tuple("client", "server", TRACE_ID, TRACE_ID)
383+
);
348384
}
349385

350386
@Test
@@ -449,6 +485,19 @@ public void linksLoopbackSpans() {
449485
DependencyLink.newBuilder().parent("service").child("service").callCount(1L).build()
450486
);
451487
}
488+
489+
// now re-use the same instance and verify trace IDs are correct
490+
DependencyLinker linker = new DependencyLinker();
491+
for (Span span : validRootSpans) {
492+
linker.putTrace(asList(span).iterator());
493+
}
494+
495+
// shows the correct trace IDs
496+
assertThat(linker.link())
497+
.extracting("parent", "child", "callTraceIds")
498+
.containsExactly(
499+
tuple("service", "service", asList("000000000000000a", "000000000000000b"))
500+
);
452501
}
453502

454503
@Test

0 commit comments

Comments
 (0)