diff --git a/js/dev/server.ts b/js/dev/server.ts
index aee357ad5..4e4daaf4d 100644
--- a/js/dev/server.ts
+++ b/js/dev/server.ts
@@ -117,7 +117,7 @@ export function runDevServer(
 
         evalDefs[name] = {
           parameters,
-          scores: evaluator.scores.map((score, idx) => ({
+          scores: (evaluator.scores ?? []).map((score, idx) => ({
             name: scorerName(score, idx),
           })),
         };
@@ -209,7 +209,7 @@ export function runDevServer(
           {
             ...evaluator,
             data: evalData.data,
-            scores: evaluator.scores.concat(
+            scores: (evaluator.scores ?? []).concat(
               scores?.map((score) =>
                 makeScorer(
                   state,
diff --git a/js/src/cli/functions/infer-source.ts b/js/src/cli/functions/infer-source.ts
index 179b5c50d..0a759b422 100644
--- a/js/src/cli/functions/infer-source.ts
+++ b/js/src/cli/functions/infer-source.ts
@@ -85,7 +85,7 @@ export async function findCodeDefinition({
       fn =
         location.position.type === "task"
           ? evaluator.task
-          : evaluator.scores[location.position.index];
+          : (evaluator.scores ?? [])[location.position.index];
     }
   } else if (location.type === "function") {
     fn = outFileModule.functions[location.index].handler;
diff --git a/js/src/cli/functions/upload.ts b/js/src/cli/functions/upload.ts
index ce67f44d9..1745b5329 100644
--- a/js/src/cli/functions/upload.ts
+++ b/js/src/cli/functions/upload.ts
@@ -180,23 +180,25 @@ export async function uploadHandleBundles({
           function_type: "task",
           origin,
         },
-        ...evaluator.evaluator.scores.map((score, i): BundledFunctionSpec => {
-          const name = scorerName(score, i);
-          return {
-            ...baseInfo,
-            // There is a very small chance that someone names a function with the same convention, but
-            // let's assume it's low enough that it doesn't matter.
-            ...formatNameAndSlug(["eval", namePrefix, "scorer", name]),
-            description: `Score ${name} for eval ${namePrefix}`,
-            location: {
-              type: "experiment",
-              eval_name: evaluator.evaluator.evalName,
-              position: { type: "scorer", index: i },
-            },
-            function_type: "scorer",
-            origin,
-          };
-        }),
+        ...(evaluator.evaluator.scores ?? []).map(
+          (score, i): BundledFunctionSpec => {
+            const name = scorerName(score, i);
+            return {
+              ...baseInfo,
+              // There is a very small chance that someone names a function with the same convention, but
+              // let's assume it's low enough that it doesn't matter.
+              ...formatNameAndSlug(["eval", namePrefix, "scorer", name]),
+              description: `Score ${name} for eval ${namePrefix}`,
+              location: {
+                type: "experiment",
+                eval_name: evaluator.evaluator.evalName,
+                position: { type: "scorer", index: i },
+              },
+              function_type: "scorer",
+              origin,
+            };
+          },
+        ),
       ];
 
       bundleSpecs.push(...fileSpecs);
@@ -219,7 +221,7 @@ export async function uploadHandleBundles({
                   serializeRemoteEvalParametersContainer(resolvedParameters),
               }
             : {}),
-          scores: evaluator.evaluator.scores.map((score, i) => ({
+          scores: (evaluator.evaluator.scores ?? []).map((score, i) => ({
             name: scorerName(score, i),
           })),
         };
diff --git a/js/src/exports.ts b/js/src/exports.ts
index 6eb2b5c1f..352d5a80e 100644
--- a/js/src/exports.ts
+++ b/js/src/exports.ts
@@ -185,6 +185,7 @@ export type {
   EvalResult,
   EvalScorerArgs,
   EvalScorer,
+  EvalClassifier,
   EvaluatorDef,
   EvaluatorFile,
   ReporterBody,
diff --git a/js/src/framework.test.ts b/js/src/framework.test.ts
index bd9381342..45989fcdf 100644
--- a/js/src/framework.test.ts
+++ b/js/src/framework.test.ts
@@ -179,7 +179,6 @@ describe("runEvaluator", () => {
         new NoopProgressReporter(),
         [],
         undefined,
-        true,
       );
 
       expect(out.results.every((r) => Object.keys(r.scores).length === 0)).toBe(
@@ -207,7 +206,6 @@ describe("runEvaluator", () => {
             new NoopProgressReporter(),
             [],
             undefined,
-            true,
           );
 
           expect(
@@ -237,7 +235,6 @@ describe("runEvaluator", () => {
             new NoopProgressReporter(),
             [],
             undefined,
-            true,
           );
 
           expect(
@@ -271,7 +268,6 @@ describe("runEvaluator", () => {
             new NoopProgressReporter(),
             [],
             undefined,
-            true,
           );
 
           expect(
@@ -297,7 +293,6 @@ describe("runEvaluator", () => {
             new NoopProgressReporter(),
             [],
             undefined,
-            true,
           );
 
           expect(
@@ -477,7 +472,7 @@ test("trialIndex is passed to task", async () => {
   // All results should be correct
   results.forEach((result) => {
     expect(result.input).toBe(1);
-    expect(result.expected).toBe(2);
+    expect("expected" in result ? result.expected : undefined).toBe(2);
     expect(result.output).toBe(2);
     expect(result.error).toBeUndefined();
   });
@@ -575,9 +570,8 @@ test("Eval with noSendLogs: true runs locally without creating experiment", asyn
 
 test("Eval with returnResults: false produces empty results but valid summary", async () => {
   const result = await Eval(
-    "test-no-results",
+    "test-no-results-project",
     {
-      projectName: "test-no-results-project",
       data: [
         { input: "hello", expected: "hello world" },
         { input: "test", expected: "test world" },
@@ -615,9 +609,8 @@ test("Eval with returnResults: false produces empty results but valid summary",
 
 test("Eval with returnResults: true collects all results", async () => {
   const result = await Eval(
-    "test-with-results",
+    "test-with-results-project",
     {
-      projectName: "test-with-results-project",
       data: [
         { input: "hello", expected: "hello world" },
         { input: "test", expected: "test world" },
@@ -668,7 +661,7 @@ test("tags can be appended and logged to root span", async () => {
       evalName: "js-tags-append",
       data: [{ input: "hello", expected: "hello world", tags: initialTags }],
       task: (input, hooks) => {
-        for (const t of appendedTags) hooks.tags.push(t);
+        for (const t of appendedTags) hooks.tags!.push(t);
         return input;
       },
       scores: [() => ({ name: "simple_scorer", score: 0.8 })],
@@ -825,7 +818,7 @@ test("scorer spans have purpose='scorer' attribute", async () => {
       data: [{ input: "hello", expected: "hello" }],
       task: async (input: string) => input,
       scores: [
-        (args: { input: string; output: string; expected: string }) => ({
+        (args: { output: string; expected?: string }) => ({
           name: "simple_scorer",
           score: args.output === args.expected ? 1 : 0,
         }),
@@ -972,11 +965,12 @@ describe("framework2 metadata support", () => {
           options: { model: "gpt-4" },
         },
         [],
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
         {
           name: "test-prompt",
           slug: "test-prompt",
           metadata,
-        },
+        } as any,
       );
 
       const mockProjectMap = {
@@ -1001,10 +995,8 @@ describe("framework2 metadata support", () => {
           options: { model: "gpt-4" },
         },
         [],
-        {
-          name: "test-prompt",
-          slug: "test-prompt",
-        },
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        { name: "test-prompt", slug: "test-prompt" } as any,
       );
 
       const mockProjectMap = {
@@ -1027,11 +1019,12 @@ describe("framework2 metadata support", () => {
           options: { model: "gpt-4" },
         },
         [],
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
         {
           name: "test-prompt",
           slug: "test-prompt",
           environments: ["production"],
-        },
+        } as any,
       );
 
       const mockProjectMap = {
@@ -1054,11 +1047,12 @@ describe("framework2 metadata support", () => {
           options: { model: "gpt-4" },
         },
         [],
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
         {
           name: "test-prompt",
           slug: "test-prompt",
           environments: ["staging", "production"],
-        },
+        } as any,
       );
 
       const mockProjectMap = {
@@ -1084,10 +1078,8 @@ describe("framework2 metadata support", () => {
           options: { model: "gpt-4" },
         },
         [],
-        {
-          name: "test-prompt",
-          slug: "test-prompt",
-        },
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        { name: "test-prompt", slug: "test-prompt" } as any,
       );
 
       const mockProjectMap = {
@@ -1130,11 +1122,8 @@ describe("framework2 metadata support", () => {
           options: { model: "gpt-4" },
         },
         [],
-        {
-          name: "test-prompt",
-          slug: "test-prompt",
-          tags,
-        },
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        { name: "test-prompt", slug: "test-prompt", tags } as any,
       );
 
       const mockProjectMap = {
@@ -1159,10 +1148,8 @@ describe("framework2 metadata support", () => {
           options: { model: "gpt-4" },
         },
         [],
-        {
-          name: "test-prompt",
-          slug: "test-prompt",
-        },
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        { name: "test-prompt", slug: "test-prompt" } as any,
       );
 
       const mockProjectMap = {
@@ -1504,3 +1491,126 @@ test("Eval with enableCache: true (default) uses span cache", async () => {
   expect(startSpy).toHaveBeenCalled();
   expect(stopSpy).toHaveBeenCalled();
 });
+
+test("classifier-only evaluator populates classifications field", async () => {
+  const result = await Eval(
+    "test-classifier-only",
+    {
+      data: [{ input: "hello", expected: "greeting" }],
+      task: (input) => input,
+      classifiers: [
+        () => ({
+          name: "category",
+          id: "greeting",
+          label: "Greeting",
+          metadata: { source: "unit-test" },
+        }),
+      ],
+    },
+    { noSendLogs: true, returnResults: true },
+  );
+
+  expect(result.results).toHaveLength(1);
+  const r = result.results[0];
+  expect(r.classifications?.category).toEqual([
+    {
+      id: "greeting",
+      label: "Greeting",
+      metadata: { source: "unit-test" },
+    },
+  ]);
+});
+
+test("scorer-only evaluator populates scores field", async () => {
+  const result = await Eval(
+    "test-scorer-only",
+    {
+      data: [{ input: "hello", expected: "hello" }],
+      task: (input) => input,
+      scores: [
+        (args) => ({
+          name: "exact_match",
+          score: args.output === args.expected ? 1 : 0,
+        }),
+      ],
+    },
+    { noSendLogs: true, returnResults: true },
+  );
+
+  expect(result.results).toHaveLength(1);
+  expect(result.results[0].scores.exact_match).toBe(1);
+  expect(result.results[0].classifications).toBeUndefined();
+});
+
+test("multiple classifiers returning the same name append items correctly", async () => {
+  const result = await Eval(
+    "test-classifier-append",
+    {
+      data: [{ input: "hello" }],
+      task: (input) => input,
+      classifiers: [
+        () => [
+          { name: "category", id: "greeting", label: "Greeting" },
+          { name: "category", id: "informal", label: "Informal" },
+        ],
+      ],
+    },
+    { noSendLogs: true, returnResults: true },
+  );
+
+  expect(result.results).toHaveLength(1);
+  expect(result.results[0].classifications?.category).toHaveLength(2);
+  expect(result.results[0].classifications?.category[0]).toEqual({
+    id: "greeting",
+    label: "Greeting",
+  });
+  expect(result.results[0].classifications?.category[1]).toEqual({
+    id: "informal",
+    label: "Informal",
+  });
+});
+
+test("mixed evaluator populates both scores and classifications", async () => {
+  const result = await Eval(
+    "test-score-and-classify",
+    {
+      data: [{ input: "hello", expected: "hello" }],
+      task: (input) => input,
+      scores: [
+        (args) => ({
+          name: "exact_match",
+          score: args.output === args.expected ? 1 : 0,
+        }),
+      ],
+      classifiers: [
+        () => ({ name: "category", id: "greeting", label: "Greeting" }),
+      ],
+    },
+    { noSendLogs: true, returnResults: true },
+  );
+
+  expect(result.results).toHaveLength(1);
+  expect(result.results[0].scores.exact_match).toBe(1);
+  expect(result.results[0].classifications?.category).toEqual([
+    { id: "greeting", label: "Greeting" },
+  ]);
+});
+
+test("malformed classifier output fails clearly", async () => {
+  const result = await Eval(
+    "test-invalid-classifier-output",
+    {
+      data: [{ input: "hello" }],
+      task: (input) => input,
+      classifiers: [() => ({}) as never],
+    },
+    { noSendLogs: true, returnResults: true },
+  );
+
+  expect(result.results).toHaveLength(1);
+  expect((result.results[0] as any).metadata?.classifier_errors).toMatchObject({
+    classifier_0: expect.stringMatching(
+      /must return classifications with a non-empty string name/,
+    ),
+  });
+});
diff --git a/js/src/framework.ts b/js/src/framework.ts
index 637e688d9..c2bad700d 100644
--- a/js/src/framework.ts
+++ b/js/src/framework.ts
@@ -1,6 +1,8 @@
 import {
   makeScorerPropagatedEvent,
   mergeDicts,
+  Classification,
+  ClassificationItem,
   Score,
   SpanComponentsV3,
   SpanTypeAttribute,
@@ -186,6 +188,17 @@ export type EvalScorer<
   args: EvalScorerArgs<Input, Output, Expected, Metadata>,
 ) => OneOrMoreScores | Promise<OneOrMoreScores>;
 
+export type OneOrMoreClassifications = Classification | Classification[] | null;
+
+export type EvalClassifier<
+  Input,
+  Output,
+  Expected,
+  Metadata extends BaseMetadata = DefaultMetadataType,
+> = (
+  args: EvalScorerArgs<Input, Output, Expected, Metadata>,
+) => OneOrMoreClassifications | Promise<OneOrMoreClassifications>;
+
 export type EvalResult<
   Input,
   Output,
@@ -193,9 +206,10 @@ export type EvalResult<
   Metadata extends BaseMetadata = DefaultMetadataType,
 > = EvalCase<Input, Expected, Metadata> & {
   output: Output;
-  scores: Record<string, number | null>;
   error: unknown;
   origin?: ObjectReference;
+  scores: Record<string, number | null>;
+  classifications?: Record<string, ClassificationItem[]>;
 };
 
 type ErrorScoreHandler = (args: {
@@ -205,6 +219,10 @@ type ErrorScoreHandler = (args: {
   unhandledScores: string[];
 }) => Record<string, number> | undefined | void;
 
+/**
+ * Defines an evaluator. At least one of `scores` or `classifiers` must be provided;
+ * a runtime error is raised if neither is present.
+ */
 export interface Evaluator<
   Input,
   Output,
@@ -223,9 +241,17 @@ export interface Evaluator<
   task: EvalTask<Input, Output, Expected, Metadata, Parameters>;
 
   /**
-   * A set of functions that take an input, output, and expected value and return a score.
+   * A set of functions that take an input, output, and expected value and return a {@link Score}.
+   * At least one of `scores` or `classifiers` must be provided.
    */
-  scores: EvalScorer<Input, Output, Expected, Metadata>[];
+  scores?: EvalScorer<Input, Output, Expected, Metadata>[];
+
+  /**
+   * A set of functions that take an input, output, and expected value and return a
+   * {@link Classification}. Results are recorded under the `classifications` column.
+   * At least one of `scores` or `classifiers` must be provided.
+   */
+  classifiers?: EvalClassifier<Input, Output, Expected, Metadata>[];
 
   /**
    * A set of parameters that will be passed to the evaluator.
@@ -864,6 +890,132 @@ export function scorerName(
   return scorer.name || `scorer_${scorer_idx}`;
 }
 
+function classifierName(
+  classifier: EvalClassifier<any, any, any, any>,
+  classifier_idx: number,
+) {
+  return classifier.name || `classifier_${classifier_idx}`;
+}
+
+function buildSpanMetadata(
+  results: Array<{ name: string; metadata?: Record<string, unknown> }>,
+) {
+  return results.length === 1
+    ? results[0].metadata
+    : results.reduce(
+        (prev, s) => mergeDicts(prev, { [s.name]: s.metadata }),
+        {},
+      );
+}
+
+function buildSpanScores(
+  results: Array<{
+    name: string;
+    score: number | null;
+    metadata?: Record<string, unknown>;
+  }>,
+) {
+  const scoresRecord = results.reduce(
+    (prev, s) => mergeDicts(prev, { [s.name]: s.score }),
+    {},
+  );
+  return { resultMetadata: buildSpanMetadata(results), scoresRecord };
+}
+
+async function runInScorerSpan<T>(
+  rootSpan: Span,
+  spanName: string,
+  spanType: SpanTypeAttribute,
+  propagatedEvent: ReturnType<typeof makeScorerPropagatedEvent>,
+  eventInput: unknown,
+  fn: (span: Span) => Promise<T[] | null>,
+): Promise<
+  { kind: "score"; value: T[] | null } | { kind: "error"; value: unknown }
+> {
+  try {
+    const value = await rootSpan.traced(fn, {
+      name: spanName,
+      spanAttributes: { type: spanType, purpose: "scorer" },
+      propagatedEvent,
+      event: { input: eventInput },
+    });
+    return { kind: "score", value };
+  } catch (e) {
+    return { kind: "error", value: e };
+  }
+}
+
+function collectScoringResults<T extends { name: string }>(
+  runResults: Array<
+    { kind: "score"; value: T[] | null } | { kind: "error"; value: unknown }
+  >,
+  names: string[],
+  onResult: (result: T) => void,
+): { name: string; error: unknown }[] {
+  const failing: { name: string; error: unknown }[] = [];
+  runResults.forEach((r, i) => {
+    if (r.kind === "score") {
+      (r.value ?? []).forEach(onResult);
+    } else {
+      failing.push({ name: names[i], error: r.value });
+    }
+  });
+  return failing;
+}
+
+function validateClassificationResult(
+  value: unknown,
+  scorerName: string,
+): Classification {
+  if (!(typeof value === "object" && value !== null && !isEmpty(value))) {
+    throw new Error(
+      `When returning structured classifier results, each classification must be a non-empty object. Got: ${JSON.stringify(value)}`,
+    );
+  }
+  if (!("name" in value) || typeof value.name !== "string" || !value.name) {
+    throw new Error(
+      `Classifier ${scorerName} must return classifications with a non-empty string name. Got: ${JSON.stringify(value)}`,
+    );
+  }
+  if (!("id" in value) || typeof value.id !== "string" || !value.id) {
+    throw new Error(
+      `Classifier ${scorerName} must return classifications with a non-empty string id. Got: ${JSON.stringify(value)}`,
+    );
+  }
+  return value as Classification;
+}
+
+function toClassificationItem(c: Classification): ClassificationItem {
+  return {
+    id: c.id,
+    label: c.label ?? c.id,
+    ...(c.metadata !== undefined ? { metadata: c.metadata } : {}),
+  };
+}
+
+function logScoringFailures(
+  kind: string,
+  failures: { name: string; error: unknown }[],
+  metadata: Record<string, unknown>,
+  rootSpan: Span,
+  state: BraintrustState | undefined,
+): string[] {
+  if (!failures.length) return [];
+  const errorMap = Object.fromEntries(
+    failures.map(({ name, error }) => [
+      name,
+      error instanceof Error ? error.stack : `${error}`,
+    ]),
+  );
+  metadata[`${kind}_errors`] = errorMap;
+  rootSpan.log({ metadata: { [`${kind}_errors`]: errorMap } });
+  debugLogger.forState(state).warn(
+    `Found exceptions for the following ${kind}s: ${Object.keys(errorMap).join(", ")}`,
+    failures.map((f) => f.error),
+  );
+  return Object.keys(errorMap);
+}
+
 export async function runEvaluator(
   experiment: Experiment | null,
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -876,6 +1028,11 @@ export async function runEvaluator(
   enableCache = true,
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
 ): Promise<EvalResultWithSummary<any, any, any, any>> {
+  if (!evaluator.scores && !evaluator.classifiers) {
+    throw new Error(
+      "Evaluator must include at least one of `scores` or `classifiers`",
+    );
+  }
   return await runEvaluatorInternal(
     experiment,
     evaluator,
@@ -1089,7 +1246,11 @@ async function runEvaluatorInternal(
           let error: unknown | undefined = undefined;
           let tags: string[] = [...(datum.tags ?? [])];
           const scores: Record<string, number | null> = {};
-          const scorerNames = evaluator.scores.map(scorerName);
+          const classifications: Record<string, ClassificationItem[]> = {};
+          const scorerNames = (evaluator.scores ?? []).map(scorerName);
+          const classifierNames = (evaluator.classifiers ?? []).map(
+            classifierName,
+          );
           let unhandledScores: string[] | null = scorerNames;
           try {
             const meta = (o: Record<string, unknown>) =>
@@ -1154,139 +1315,156 @@ async function runEvaluatorInternal(
               output,
               trace,
             };
-            const scoreResults = await Promise.all(
-              evaluator.scores.map(async (score, score_idx) => {
-                try {
-                  const runScorer = async (span: Span) => {
-                    const scoreResult = score(scoringArgs);
-                    const scoreValue =
-                      scoreResult instanceof Promise
-                        ? await scoreResult
-                        : scoreResult;
-
-                    if (scoreValue === null) {
-                      return null;
-                    }
-
-                    if (Array.isArray(scoreValue)) {
-                      for (const s of scoreValue) {
-                        if (!(typeof s === "object" && !isEmpty(s))) {
-                          throw new Error(
-                            `When returning an array of scores, each score must be a non-empty object. Got: ${JSON.stringify(
-                              s,
-                            )}`,
-                          );
+            const { trace: _trace, ...scoringArgsForLogging } = scoringArgs;
+            const propagatedEvent = makeScorerPropagatedEvent(
+              await rootSpan.export(),
+            );
+
+            const getOtherFields = (s: Score) => {
+              const { metadata: _metadata, name: _name, ...rest } = s;
+              return rest;
+            };
+
+            const [scoreResults, classificationResults] = await Promise.all([
+              Promise.all(
+                (evaluator.scores ?? []).map((score, score_idx) =>
+                  runInScorerSpan(
+                    rootSpan,
+                    scorerNames[score_idx],
+                    SpanTypeAttribute.SCORE,
+                    propagatedEvent,
+                    scoringArgsForLogging,
+                    async (span) => {
+                      const scoreValue = await Promise.resolve(
+                        score(scoringArgs),
+                      );
+                      if (scoreValue === null) return null;
+                      if (Array.isArray(scoreValue)) {
+                        for (const s of scoreValue) {
+                          if (!(typeof s === "object" && !isEmpty(s))) {
+                            throw new Error(
+                              `When returning an array of scores, each score must be a non-empty object. Got: ${JSON.stringify(s)}`,
+                            );
+                          }
                         }
                       }
-                    }
-
-                    const results = Array.isArray(scoreValue)
-                      ? scoreValue
-                      : typeof scoreValue === "object" && !isEmpty(scoreValue)
-                        ? [scoreValue]
-                        : [
-                            {
-                              name: scorerNames[score_idx],
-                              score: scoreValue,
-                            },
-                          ];
-
-                    const getOtherFields = (s: Score) => {
-                      const { metadata: _metadata, name: _name, ...rest } = s;
-                      return rest;
-                    };
-
-                    const resultMetadata =
-                      results.length === 1
-                        ? results[0].metadata
-                        : results.reduce(
-                            (prev, s) =>
-                              mergeDicts(prev, {
-                                [s.name]: s.metadata,
-                              }),
-                            {},
-                          );
-
-                    const resultOutput =
-                      results.length === 1
-                        ? getOtherFields(results[0])
-                        : results.reduce(
-                            (prev, s) =>
-                              mergeDicts(prev, { [s.name]: getOtherFields(s) }),
-                            {},
-                          );
-
-                    const scores = results.reduce(
-                      (prev, s) => mergeDicts(prev, { [s.name]: s.score }),
-                      {},
-                    );
-
-                    span.log({
-                      output: resultOutput,
-                      metadata: resultMetadata,
-                      scores: scores,
-                    });
-                    return results;
-                  };
-
-                  // Exclude trace from logged input since it contains internal state
-                  // that shouldn't be serialized (spansFlushPromise, spansFlushed, etc.)
-                  const { trace: _trace, ...scoringArgsForLogging } =
-                    scoringArgs;
-                  const results = await rootSpan.traced(runScorer, {
-                    name: scorerNames[score_idx],
-                    spanAttributes: {
-                      type: SpanTypeAttribute.SCORE,
-                      purpose: "scorer",
+                      const results: Score[] = Array.isArray(scoreValue)
+                        ? scoreValue
+                        : typeof scoreValue === "object" && !isEmpty(scoreValue)
+                          ? [scoreValue]
+                          : [
+                              {
+                                name: scorerNames[score_idx],
+                                score: scoreValue,
+                              },
+                            ];
+                      const { resultMetadata, scoresRecord } =
+                        buildSpanScores(results);
+                      const resultOutput =
+                        results.length === 1
+                          ? getOtherFields(results[0])
+                          : results.reduce(
+                              (prev, s) =>
+                                mergeDicts(prev, {
+                                  [s.name]: getOtherFields(s),
+                                }),
+                              {},
+                            );
+                      span.log({
+                        output: resultOutput,
+                        metadata: resultMetadata,
+                        scores: scoresRecord,
+                      });
+                      return results;
+                    },
+                  ),
+                ),
+              ),
+              Promise.all(
+                (evaluator.classifiers ?? []).map((classifier, idx) =>
+                  runInScorerSpan(
+                    rootSpan,
+                    classifierNames[idx],
+                    SpanTypeAttribute.CLASSIFIER,
+                    propagatedEvent,
+                    scoringArgsForLogging,
+                    async (span) => {
+                      const classifierValue = await Promise.resolve(
+                        classifier(scoringArgs),
+                      );
+                      if (classifierValue === null) return null;
+                      const rawResults = (
+                        Array.isArray(classifierValue)
+                          ? classifierValue
+                          : [classifierValue]
+                      ).map((result) =>
+                        validateClassificationResult(
+                          result,
+                          classifierNames[idx],
+                        ),
+                      );
+                      const resultOutput =
+                        rawResults.length === 1
+                          ? toClassificationItem(rawResults[0])
+                          : rawResults.reduce(
+                              (prev, r) =>
+                                mergeDicts(prev, {
+                                  [r.name]: toClassificationItem(r),
+                                }),
+                              {},
+                            );
+                      span.log({
+                        output: resultOutput,
+                        metadata: buildSpanMetadata(rawResults),
+                      });
+                      return rawResults;
                     },
-                    propagatedEvent: makeScorerPropagatedEvent(
-                      await rootSpan.export(),
-                    ),
-                    event: { input: scoringArgsForLogging },
-                  });
-                  return { kind: "score", value: results } as const;
-                } catch (e) {
-                  return { kind: "error", value: e } as const;
+                  ),
+                ),
+              ),
+            ]);
+
+            const failingScorers = collectScoringResults(
+              scoreResults,
+              scorerNames,
+              (result) => {
+                scores[result.name] = result.score;
+              },
+            );
+
+            const failingClassifiers = collectScoringResults(
+              classificationResults,
+              classifierNames,
+              (result) => {
+                const item = toClassificationItem(result);
+                if (!classifications[result.name]) {
+                  classifications[result.name] = [];
                 }
-              }),
+                classifications[result.name].push(item);
+              },
             );
-            // Resolve each promise on its own so that we can separate the passing
-            // from the failing ones.
-            const failingScorersAndResults: { name: string; error: unknown }[] =
-              [];
-            scoreResults.forEach((results, i) => {
-              const name = scorerNames[i];
-              if (results.kind === "score") {
-                (results.value || []).forEach((result) => {
-                  scores[result.name] = result.score;
-                });
-              } else {
-                failingScorersAndResults.push({ name, error: results.value });
-              }
-            });
 
-            unhandledScores = null;
-            if (failingScorersAndResults.length) {
-              const scorerErrors = Object.fromEntries(
-                failingScorersAndResults.map(({ name, error }) => [
-                  name,
-                  error instanceof Error ? error.stack : `${error}`,
-                ]),
-              );
-              metadata["scorer_errors"] = scorerErrors;
-              rootSpan.log({
-                metadata: { scorer_errors: scorerErrors },
-              });
-              const names = Object.keys(scorerErrors).join(", ");
-              const errors = failingScorersAndResults.map((item) => item.error);
-              unhandledScores = Object.keys(scorerErrors);
-              debugLogger
-                .forState(evaluator.state)
-                .warn(
-                  `Found exceptions for the following scorers: ${names}`,
-                  errors,
-                );
+            if (Object.keys(classifications).length > 0) {
+              rootSpan.log({ classifications });
             }
+
+            const failedScorerNames = logScoringFailures(
+              "scorer",
+              failingScorers,
+              metadata,
+              rootSpan,
+              evaluator.state,
+            );
+            unhandledScores = failedScorerNames.length
+              ? failedScorerNames
+              : null;
+            logScoringFailures(
+              "classifier",
+              failingClassifiers,
+              metadata,
+              rootSpan,
+              evaluator.state,
+            );
           } catch (e) {
             logSpanError(rootSpan, e);
             error = e;
@@ -1310,15 +1488,21 @@ async function runEvaluatorInternal(
           }
 
           if (collectResults) {
-            collectedResults.push({
+            const baseResult = {
               input: datum.input,
               ...("expected" in datum ? { expected: datum.expected } : {}),
               output,
               tags: tags.length ? tags : undefined,
               metadata,
-              scores: mergedScores,
               error,
               origin: baseEvent.event?.origin,
+            };
+            collectedResults.push({
+              ...baseResult,
+              scores: mergedScores,
+              ...(Object.keys(classifications).length > 0
+                ? { classifications }
+                : {}),
             });
           }
         };
diff --git a/js/src/parameters.test.ts b/js/src/parameters.test.ts
index dbba8ea49..d5b7b7e4b 100644
--- a/js/src/parameters.test.ts
+++ b/js/src/parameters.test.ts
@@ -26,6 +26,7 @@ test("parameters are passed to task", async () => {
         return output;
       },
       scores: [],
+      classifiers: [],
       parameters: {
         prefix: z.string().default("start:"),
         suffix: z.string().default(":end"),
@@ -59,6 +60,7 @@ test("prompt parameter is passed correctly", async () => {
         return input;
       },
       scores: [],
+      classifiers: [],
       parameters: {
         main: {
           type: "prompt",
@@ -99,6 +101,7 @@ test("custom parameter values override defaults", async () => {
         return output;
       },
       scores: [],
+      classifiers: [],
       parameters: {
         prefix: z.string().default("start:"),
         suffix: z.string().default(":end"),
@@ -131,6 +134,7 @@ test("array parameter is handled correctly", async () => {
         return input;
       },
       scores: [],
+      classifiers: [],
       parameters: {
         items: z.array(z.string()).default(["item1", "item2"]),
       },
@@ -161,6 +165,7 @@ test("object parameter is handled correctly", async () => {
         return input;
       },
       scores: [],
+      classifiers: [],
       parameters: {
         config: z
           .object({
@@ -196,6 +201,7 @@ test("model parameter defaults to configured value", async () => {
         return input;
       },
       scores: [],
+      classifiers: [],
       parameters: {
         model: {
           type: "model",
@@ -224,6 +230,7 @@ test("model parameter is required when default is missing", async () => {
         data: [{ input: "test" }],
         task: async (input: string) => input,
         scores: [],
+        classifiers: [],
         parameters: {
           model: {
             type: "model",
diff --git a/js/util/index.ts b/js/util/index.ts
index 25a76cc03..52b082cc1 100644
--- a/js/util/index.ts
+++ b/js/util/index.ts
@@ -55,7 +55,13 @@ export {
   ensureNewDatasetRecord,
 } from "./object";
 
-export type { Score, Scorer, ScorerArgs } from "./score";
+export type {
+  Classification,
+  ClassificationItem,
+  Score,
+  Scorer,
+  ScorerArgs,
+} from "./score";
 
 export { constructJsonArray, deterministicReplacer } from "./json_util";
 
diff --git a/js/util/object.ts b/js/util/object.ts
index 735f52960..fea8735d6 100644
--- a/js/util/object.ts
+++ b/js/util/object.ts
@@ -21,6 +21,7 @@ export type OtherExperimentLogFields = {
   error: unknown;
   tags: string[];
   scores: Record<string, number | null>;
+  classifications?: Record<string, { id: string; label: string }[]>;
   metadata: Record<string, unknown>;
   metrics: Record<string, unknown>;
   datasetRecordId: string;
diff --git a/js/util/score.ts b/js/util/score.ts
index 758902344..08daebeef 100644
--- a/js/util/score.ts
+++ b/js/util/score.ts
@@ -1,3 +1,23 @@
+/**
+ * The result returned by a classifier function. Unlike `Score`, `id` is
+ * required and the span will be recorded as a classifier span.
+ */
+export interface Classification {
+  name: string;
+  id: string;
+  label?: string;
+  metadata?: Record<string, unknown>;
+}
+
+/**
+ * The serialized form of a classification stored in the `classifications` log record.
+ */
+export interface ClassificationItem {
+  id: string;
+  label: string;
+  metadata?: Record<string, unknown>;
+}
+
 export interface Score {
   name: string;
   score: number | null;