Skip to content

Commit fc41b95

Browse files
committed
fix(otel): prevent unpaired unicode surrogate pairs from causing insert errors
1 parent 129dc02 commit fc41b95

File tree

2 files changed

+102
-31
lines changed

2 files changed

+102
-31
lines changed

apps/webapp/app/v3/otlpExporter.server.ts

Lines changed: 88 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import type {
2929
import { startSpan } from "./tracing.server";
3030
import { enrichCreatableEvents } from "./utils/enrichCreatableEvents.server";
3131
import { env } from "~/env.server";
32+
import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings";
33+
import { singleton } from "~/utils/singleton";
3234

3335
class OTLPExporter {
3436
private _tracer: Tracer;
@@ -221,18 +223,16 @@ function convertLogsToCreateableEvents(
221223
);
222224

223225
const properties =
224-
convertKeyValueItemsToMap(
225-
truncateAttributes(log.attributes ?? [], spanAttributeValueLengthLimit),
226-
[],
227-
undefined,
228-
[
226+
truncateAttributes(
227+
convertKeyValueItemsToMap(log.attributes ?? [], [], undefined, [
229228
SemanticInternalAttributes.USAGE,
230229
SemanticInternalAttributes.SPAN,
231230
SemanticInternalAttributes.METADATA,
232231
SemanticInternalAttributes.STYLE,
233232
SemanticInternalAttributes.METRIC_EVENTS,
234233
SemanticInternalAttributes.TRIGGER,
235-
]
234+
]),
235+
spanAttributeValueLengthLimit
236236
) ?? {};
237237

238238
return {
@@ -304,18 +304,16 @@ function convertSpansToCreateableEvents(
304304
);
305305

306306
const properties =
307-
convertKeyValueItemsToMap(
308-
truncateAttributes(span.attributes ?? [], spanAttributeValueLengthLimit),
309-
[],
310-
undefined,
311-
[
307+
truncateAttributes(
308+
convertKeyValueItemsToMap(span.attributes ?? [], [], undefined, [
312309
SemanticInternalAttributes.USAGE,
313310
SemanticInternalAttributes.SPAN,
314311
SemanticInternalAttributes.METADATA,
315312
SemanticInternalAttributes.STYLE,
316313
SemanticInternalAttributes.METRIC_EVENTS,
317314
SemanticInternalAttributes.TRIGGER,
318-
]
315+
]),
316+
spanAttributeValueLengthLimit
319317
) ?? {};
320318

321319
return {
@@ -774,24 +772,83 @@ function binaryToHex(buffer: Buffer | string | undefined): string | undefined {
774772
return Buffer.from(Array.from(buffer)).toString("hex");
775773
}
776774

777-
function truncateAttributes(attributes: KeyValue[], maximumLength: number = 1024): KeyValue[] {
778-
return attributes.map((attribute) => {
779-
return isStringValue(attribute.value)
780-
? {
781-
key: attribute.key,
782-
value: {
783-
stringValue: attribute.value.stringValue.slice(0, maximumLength),
784-
},
785-
}
786-
: attribute;
787-
});
775+
function truncateAttributes(
776+
attributes: Record<string, string | number | boolean | undefined> | undefined,
777+
maximumLength: number = 1024
778+
): Record<string, string | number | boolean | undefined> | undefined {
779+
if (!attributes) return undefined;
780+
781+
const truncatedAttributes: Record<string, string | number | boolean | undefined> = {};
782+
783+
for (const [key, value] of Object.entries(attributes)) {
784+
if (!key) continue;
785+
786+
if (typeof value === "string") {
787+
truncatedAttributes[key] = truncateAndDetectUnpairedSurrogate(value, maximumLength);
788+
} else {
789+
truncatedAttributes[key] = value;
790+
}
791+
}
792+
793+
return truncatedAttributes;
788794
}
789795

790-
export const otlpExporter = new OTLPExporter(
791-
eventRepository,
792-
clickhouseEventRepository,
793-
process.env.OTLP_EXPORTER_VERBOSE === "1",
794-
process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT
795-
? parseInt(process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT, 10)
796-
: 8192
797-
);
796+
function truncateAndDetectUnpairedSurrogate(str: string, maximumLength: number): string {
797+
const truncatedString = smartTruncateString(str, maximumLength);
798+
799+
if (hasUnpairedSurrogateAtEnd(truncatedString)) {
800+
return smartTruncateString(truncatedString, [...truncatedString].length - 1);
801+
}
802+
803+
return truncatedString;
804+
}
805+
806+
const ASCII_ONLY_REGEX = /^[\x00-\x7F]*$/;
807+
808+
function smartTruncateString(str: string, maximumLength: number): string {
809+
if (!str) return "";
810+
if (str.length <= maximumLength) return str;
811+
812+
if (ASCII_ONLY_REGEX.test(str)) {
813+
return str.slice(0, maximumLength);
814+
}
815+
816+
return [...str].slice(0, maximumLength).join("");
817+
}
818+
819+
function hasUnpairedSurrogateAtEnd(str: string): boolean {
820+
if (str.length === 0) return false;
821+
822+
const lastCode = str.charCodeAt(str.length - 1);
823+
824+
// Check if last character is an unpaired high surrogate
825+
if (lastCode >= 0xd800 && lastCode <= 0xdbff) {
826+
return true; // High surrogate at end = unpaired
827+
}
828+
829+
// Check if last character is an unpaired low surrogate
830+
if (lastCode >= 0xdc00 && lastCode <= 0xdfff) {
831+
// Low surrogate is only valid if preceded by high surrogate
832+
if (str.length === 1) return true; // Single low surrogate
833+
834+
const secondLastCode = str.charCodeAt(str.length - 2);
835+
if (secondLastCode < 0xd800 || secondLastCode > 0xdbff) {
836+
return true; // Low surrogate not preceded by high surrogate
837+
}
838+
}
839+
840+
return false;
841+
}
842+
843+
export const otlpExporter = singleton("otlpExporter", initializeOTLPExporter);
844+
845+
function initializeOTLPExporter() {
846+
return new OTLPExporter(
847+
eventRepository,
848+
clickhouseEventRepository,
849+
process.env.OTLP_EXPORTER_VERBOSE === "1",
850+
process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT
851+
? parseInt(process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT, 10)
852+
: 8192
853+
);
854+
}

references/hello-world/src/trigger/telemetry.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,20 @@ export const taskWithChildTasks = task({
5151
},
5252
});
5353

54+
export const taskWithBadLogString = task({
55+
id: "otel/task-with-bad-log-string",
56+
run: async (payload: any, { ctx }) => {
57+
logger.log("Hello, world!", {
58+
myString: "👋🏽 I’m Shelby, of Defense.\n\n𝐋𝐞𝐭'𝐬 𝐛𝐮𝐢𝐥𝐝 𝐭𝐡𝐞 \ud835",
59+
});
60+
61+
logger.log("Hello, world!", {
62+
myString:
63+
"👋🏽 I’m Shelby, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, MIT-trained mathematician, and AI researcher, engineer, and speaker.\n\nI drive clarity, vision, and execution at the frontier of AI, empowering teams to build breakthrough technologies with real-world, enterprise impact. 💥\n\n🔹 35+ influential AI research publications across AI agents, LLMs, SLMs, and ML (see 𝘗𝘶𝘣𝘭𝘪𝘤𝘢𝘵𝘪𝘰𝘯𝘴 below)\n🔹 8+ years developing applied AI for Fortune 500 use cases\n🔹 10+ years hands-on engineering • 16+ years teaching & speaking with clarity\n🔹 Featured in VentureBeat, ZDNET, and more (see 𝘔𝘦𝘥𝘪𝘢 𝘊𝘰𝘷𝘦𝘳𝘢𝘨𝘦 below)\n🔹 30+ AI keynotes, talks, podcasts, and panels (see 𝘒𝘦𝘺𝘯𝘰𝘵𝘦𝘴 below)\n\nCurrently, I lead and manage a growing team of AI researchers and engineers at Salesforce. We push the boundaries of agentic AI, multi-agent systems, on-device AI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research and engineering at Intel, IBM Research, MITRE, and the Department of Defense.\n\n𝐋𝐞𝐭'𝐬 𝐛𝐮𝐢𝐥𝐝 𝐭𝐡𝐞 \ud835",
64+
});
65+
},
66+
});
67+
5468
export const generateLogsParentTask = task({
5569
id: "otel/generate-logs-parent",
5670
run: async (payload: any) => {

0 commit comments

Comments
 (0)