From ea146acb4e70817278318de007fe427ed0704f55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= Date: Mon, 11 May 2026 09:44:53 -0700 Subject: [PATCH 01/11] feat(dataconverter): add compression, encryption, and S3 offload samples Add a new package with three production-ready custom implementations following the take-and-go layout: gzip compression, AES-256-GCM encryption, and an S3 / claim-check offload pattern (with a zero-config local-filesystem default and a commented AWS SDK v2 stub). One worker hosts all three on three task lists and prints per-sample stats banners on startup. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “Kevin” --- README.md | 16 + .../samples/dataconverter/BlobStore.java | 36 ++ .../CompressedDataConverterWorkflow.java | 343 ++++++++++++++++++ .../CompressedJsonDataConverter.java | 92 +++++ .../dataconverter/CompressionStarter.java | 70 ++++ .../dataconverter/DataConverterConstants.java | 64 ++++ .../dataconverter/DataConverterSupport.java | 79 ++++ .../dataconverter/DataConverterWorker.java | 214 +++++++++++ .../EncryptedDataConverterWorkflow.java | 130 +++++++ .../EncryptedJsonDataConverter.java | 124 +++++++ .../dataconverter/EncryptionKeyLoader.java | 83 +++++ .../dataconverter/EncryptionStarter.java | 70 ++++ .../dataconverter/LocalFsBlobStore.java | 74 ++++ .../cadence/samples/dataconverter/README.md | 170 +++++++++ .../dataconverter/S3OffloadDataConverter.java | 222 ++++++++++++ .../S3OffloadDataConverterWorkflow.java | 153 ++++++++ .../dataconverter/S3OffloadStarter.java | 70 ++++ 17 files changed, 2010 insertions(+) create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/BlobStore.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/README.md create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java create mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java diff --git a/README.md b/README.md index f3f76aaf..2ae81666 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ These samples demonstrate various capabilities of Java Cadence client and server * **Custom Workflow Controls** ([`com.uber.cadence.samples.query`](src/main/java/com/uber/cadence/samples/query/)) — workflow queries that return **markdown** for Cadence Web (Markdoc buttons that **signal** workflows or **start** new workflows). **Requires Cadence Web v4.0.14+.** Copy-paste run instructions: [query samples README](src/main/java/com/uber/cadence/samples/query/README.md). +* **DataConverter Samples** ([`com.uber.cadence.samples.dataconverter`](src/main/java/com/uber/cadence/samples/dataconverter/)) — three production-ready custom `DataConverter` patterns (gzip compression, AES-256-GCM encryption, and S3 / claim-check offload) that transparently transform every workflow input, output, and activity parameter. Copy-paste run instructions: [dataconverter samples README](src/main/java/com/uber/cadence/samples/dataconverter/README.md). + ## Get the Samples Run the following commands: @@ -139,6 +141,20 @@ Starters (pick one per run): In Cadence Web, open the workflow → **Query** tab → run query **`Signal`**, **`options`**, or **`dashboard`** (matching the starter you used). +### DataConverter Samples + +Three samples (compression, encryption, S3 offload) demonstrating custom `DataConverter` implementations. One worker hosts all three on three task lists. See [src/main/java/com/uber/cadence/samples/dataconverter/README.md](src/main/java/com/uber/cadence/samples/dataconverter/README.md) for full details, encryption-key configuration, and S3 swap instructions. + +Worker (hosts all three samples; prints per-sample stats banners on startup): + + ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.DataConverterWorker + +Starters (pick one per run; each starts a new workflow execution and exits): + + ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.CompressionStarter + ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.EncryptionStarter + ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.S3OffloadStarter + ### Trip Booking Cadence implementation of the [Camunda BPMN trip booking example](https://github.com/berndruecker/trip-booking-saga-java) diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/BlobStore.java b/src/main/java/com/uber/cadence/samples/dataconverter/BlobStore.java new file mode 100644 index 00000000..d4fdaa3a --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/BlobStore.java @@ -0,0 +1,36 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import java.io.IOException; + +/** + * Abstraction over any external object store (local filesystem, S3, GCS, etc.). + * + *

{@link S3OffloadDataConverter} uses this interface to store large payloads outside Cadence + * history. The default implementation is {@link LocalFsBlobStore}, which writes to the system + * temporary directory and requires no external services. + */ +public interface BlobStore { + + /** Stores {@code data} under {@code key}, overwriting any existing value. */ + void put(String key, byte[] data) throws IOException; + + /** Returns the bytes previously stored under {@code key}. */ + byte[] get(String key) throws IOException; +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java new file mode 100644 index 00000000..5e699581 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java @@ -0,0 +1,343 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.activity.ActivityMethod; +import com.uber.cadence.activity.ActivityOptions; +import com.uber.cadence.workflow.Workflow; +import com.uber.cadence.workflow.WorkflowMethod; +import java.time.Duration; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Demonstrates gzip-over-JSON compression as a Cadence {@code DataConverter}. The workflow itself + * is unchanged from a plain Cadence workflow — the compression is applied transparently to every + * input, output, and activity parameter by {@link CompressedJsonDataConverter}, which is wired in + * at the worker by {@link DataConverterWorker}. + * + *

The workflow takes no inputs and builds its own large payload internally so it can be + * started from the Cadence CLI without bundling a custom converter into the caller. + */ +public final class CompressedDataConverterWorkflow { + + private CompressedDataConverterWorkflow() {} + + // ---------------- POJOs ---------------- + + /** + * A complex data structure with nested objects and arrays designed to demonstrate compression + * benefits. Fields are public + have no-arg constructors so the JSON data converter can + * serialize and deserialize them. + */ + public static final class LargePayload { + public String id; + public String name; + public String description; + public Map metadata; + public List items; + public Config config; + public List history; + public List tags; + public Statistics statistics; + + public LargePayload() {} + } + + public static final class Item { + public String itemId; + public String title; + public String description; + public double price; + public List categories; + public Map attributes; + public List reviews; + public Inventory inventory; + + public Item() {} + } + + public static final class Review { + public String reviewId; + public String userId; + public int rating; + public String comment; + public int helpfulVotes; + public int notHelpfulVotes; + public String date; + public boolean verifiedPurchase; + public double score; + + public Review() {} + } + + public static final class Inventory { + public int quantity; + public String location; + public String lastUpdated; + public String status; + + public Inventory() {} + } + + public static final class Config { + public String version; + public String environment; + public Map settings; + public List features; + public Limits limits; + + public Config() {} + } + + public static final class Limits { + public int maxItems; + public int maxRequestsPerMinute; + public int maxFileSizeMb; + public int maxConcurrentUsers; + public int timeoutSeconds; + + public Limits() {} + } + + public static final class HistoryEntry { + public String eventId; + public String timestamp; + public String eventType; + public String userId; + public Map details; + public String severity; + + public HistoryEntry() {} + } + + public static final class Statistics { + public int totalItems; + public int totalUsers; + public double averageRating; + public double totalRevenue; + public int activeOrders; + public double completionRate; + + public Statistics() {} + } + + // ---------------- Sample payload generator ---------------- + + /** + * Builds a sample large payload with realistic-looking, repetitive data so gzip has plenty to + * compress. + */ + public static LargePayload createLargePayload() { + LargePayload p = new LargePayload(); + p.id = "large_payload_001"; + p.name = "Comprehensive Product Catalog"; + p.description = repeat( + "This is a comprehensive product catalog containing thousands of items with detailed descriptions, specifications, and user reviews. Each item includes pricing information, inventory status, and customer feedback. The catalog is designed to provide complete information for customers making purchasing decisions. ", + 50); + + p.metadata = new LinkedHashMap<>(); + for (int i = 0; i < 30; i++) { + p.metadata.put( + "meta_key_" + i, + repeat( + "This is comprehensive metadata information with detailed descriptions and specifications. ", + 5)); + } + + p.items = new ArrayList<>(100); + for (int i = 0; i < 100; i++) { + Item it = new Item(); + it.itemId = "item_" + i; + it.title = "High-Quality Product " + i + " with Advanced Features"; + it.description = repeat( + "This is a premium product with exceptional quality and advanced features designed for professional use. It includes comprehensive documentation and support. ", + 10); + it.price = 100.0 + i * 10 + (i % 100) / 100.0; + it.categories = new ArrayList<>(); + it.categories.add("Electronics"); + it.categories.add("Professional"); + it.categories.add("Premium"); + it.categories.add("Advanced"); + + it.attributes = new LinkedHashMap<>(); + for (int k = 0; k < 20; k++) { + it.attributes.put( + "attr_" + k, + repeat( + "This is a detailed attribute description with comprehensive information about the product specification. ", + 2)); + } + + it.reviews = new ArrayList<>(25); + for (int j = 0; j < 25; j++) { + Review r = new Review(); + r.reviewId = "review_" + i + "_" + j; + r.userId = "user_" + j; + r.rating = 1 + (j % 5); + r.comment = repeat( + "This is a detailed customer review with comprehensive feedback about the product quality, delivery experience, and overall satisfaction. The customer provides specific details about their experience. ", + 3); + r.helpfulVotes = j * 2; + r.notHelpfulVotes = j; + r.date = "2024-01-15T10:30:00Z"; + r.verifiedPurchase = j % 2 == 0; + r.score = (1 + (j % 5)) + (j % 10) / 10.0; + it.reviews.add(r); + } + + Inventory inv = new Inventory(); + inv.quantity = 100 + i; + inv.location = "Warehouse " + (i % 5); + inv.lastUpdated = "2024-01-15T10:30:00Z"; + inv.status = "In Stock"; + it.inventory = inv; + p.items.add(it); + } + + Config cfg = new Config(); + cfg.version = "2.1.0"; + cfg.environment = "production"; + cfg.settings = new LinkedHashMap<>(); + cfg.settings.put("cache_enabled", "true"); + cfg.settings.put("compression_level", "high"); + cfg.settings.put("timeout", "30s"); + cfg.settings.put("max_connections", "1000"); + cfg.settings.put("retry_attempts", "3"); + cfg.features = new ArrayList<>(); + cfg.features.add("advanced_search"); + cfg.features.add("real_time_updates"); + cfg.features.add("analytics"); + cfg.features.add("reporting"); + cfg.features.add("integration"); + Limits lim = new Limits(); + lim.maxItems = 10000; + lim.maxRequestsPerMinute = 1000; + lim.maxFileSizeMb = 100; + lim.maxConcurrentUsers = 5000; + lim.timeoutSeconds = 30; + cfg.limits = lim; + p.config = cfg; + + p.history = new ArrayList<>(50); + for (int i = 0; i < 50; i++) { + HistoryEntry h = new HistoryEntry(); + h.eventId = "event_" + i; + h.timestamp = "2024-01-15T10:30:00Z"; + h.eventType = "system_update"; + h.userId = "admin_" + (i % 5); + h.details = new LinkedHashMap<>(); + for (int j = 0; j < 10; j++) { + h.details.put( + "detail_" + j, + repeat( + "This is a detailed event description with comprehensive information about the system event and its impact. ", + 2)); + } + h.severity = "medium"; + p.history.add(h); + } + + p.tags = new ArrayList<>(); + p.tags.add("catalog"); + p.tags.add("products"); + p.tags.add("inventory"); + p.tags.add("analytics"); + p.tags.add("reporting"); + p.tags.add("integration"); + p.tags.add("api"); + p.tags.add("dashboard"); + + Statistics stats = new Statistics(); + stats.totalItems = 10000; + stats.totalUsers = 5000; + stats.averageRating = 4.2; + stats.totalRevenue = 1250000.50; + stats.activeOrders = 250; + stats.completionRate = 98.5; + p.statistics = stats; + + return p; + } + + private static String repeat(String s, int n) { + StringBuilder sb = new StringBuilder(s.length() * n); + for (int i = 0; i < n; i++) { + sb.append(s); + } + return sb.toString(); + } + + // ---------------- Workflow + activity ---------------- + + public interface WorkflowIface { + + @WorkflowMethod( + name = DataConverterConstants.COMPRESSION_WORKFLOW_TYPE, + executionStartToCloseTimeoutSeconds = 60, + taskList = DataConverterConstants.TASK_LIST_COMPRESSION) + LargePayload run(); + } + + public interface Activities { + + @ActivityMethod(scheduleToCloseTimeoutSeconds = 60) + LargePayload processLargePayload(LargePayload input); + } + + public static final class WorkflowImpl implements WorkflowIface { + + private final Activities activities = + Workflow.newActivityStub( + Activities.class, + new ActivityOptions.Builder() + .setScheduleToStartTimeout(Duration.ofMinutes(1)) + .setStartToCloseTimeout(Duration.ofMinutes(1)) + .build()); + + @Override + public LargePayload run() { + LargePayload input = createLargePayload(); + + Workflow.getLogger(CompressedDataConverterWorkflow.class) + .info("Large payload workflow started: id={}, items={}", input.id, input.items.size()); + + LargePayload result = activities.processLargePayload(input); + + Workflow.getLogger(CompressedDataConverterWorkflow.class) + .info( + "Large payload workflow completed: id={}. All data was automatically gzip-compressed in Cadence history.", + result.id); + return result; + } + } + + public static final class ActivitiesImpl implements Activities { + + @Override + public LargePayload processLargePayload(LargePayload input) { + input.name = input.name + " (Processed)"; + if (input.statistics != null) { + input.statistics.totalItems = input.items != null ? input.items.size() : 0; + } + return input; + } + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java b/src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java new file mode 100644 index 00000000..e3cbd068 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java @@ -0,0 +1,92 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.converter.DataConverter; +import com.uber.cadence.converter.DataConverterException; +import com.uber.cadence.converter.JsonDataConverter; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.lang.reflect.Type; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +/** + * {@link DataConverter} that serializes values to JSON via {@link JsonDataConverter} and then + * compresses the resulting bytes with gzip. + * + *

For repetitive JSON payloads this typically achieves 60-80% size reduction, lowering storage + * cost and bandwidth without changing any workflow or activity code. Apply by setting it on the + * {@code WorkflowClientOptions} used by both the worker and any client that triggers the workflow. + */ +public final class CompressedJsonDataConverter implements DataConverter { + + private static final DataConverter delegate = JsonDataConverter.getInstance(); + + @Override + public byte[] toData(Object... values) throws DataConverterException { + if (values == null || values.length == 0) { + return null; + } + byte[] jsonBytes = delegate.toData(values); + if (jsonBytes == null || jsonBytes.length == 0) { + return jsonBytes; + } + try { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (GZIPOutputStream gzip = new GZIPOutputStream(out)) { + gzip.write(jsonBytes); + } + return out.toByteArray(); + } catch (IOException e) { + throw new DataConverterException("Failed to gzip-compress JSON payload", e); + } + } + + @Override + public T fromData(byte[] content, Class valueClass, Type valueType) + throws DataConverterException { + if (content == null || content.length == 0) { + return delegate.fromData(content, valueClass, valueType); + } + return delegate.fromData(decompress(content), valueClass, valueType); + } + + @Override + public Object[] fromDataArray(byte[] content, Type... valueTypes) throws DataConverterException { + if (content == null || content.length == 0) { + return delegate.fromDataArray(content, valueTypes); + } + return delegate.fromDataArray(decompress(content), valueTypes); + } + + private static byte[] decompress(byte[] content) throws DataConverterException { + try (GZIPInputStream gzip = new GZIPInputStream(new ByteArrayInputStream(content)); + ByteArrayOutputStream out = new ByteArrayOutputStream()) { + byte[] buf = new byte[4096]; + int read; + while ((read = gzip.read(buf)) != -1) { + out.write(buf, 0, read); + } + return out.toByteArray(); + } catch (IOException e) { + throw new DataConverterException("Failed to gunzip payload", e); + } + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java b/src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java new file mode 100644 index 00000000..fad3e3de --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java @@ -0,0 +1,70 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowOptions; +import java.time.Duration; +import java.util.UUID; + +/** + * Starts {@link CompressedDataConverterWorkflow} (async, fire-and-forget). + * + *

The workflow takes no inputs and generates its own payload, so this starter does not need to + * use the matching {@link CompressedJsonDataConverter}. The same effect can be achieved from the + * Cadence CLI via: + * + *

+ * cadence --domain samples-domain \
+ *   workflow start \
+ *   --workflow_type CompressionDataConverterWorkflow \
+ *   --tl data-compression \
+ *   --et 60
+ * 
+ */ +public final class CompressionStarter { + + private CompressionStarter() {} + + public static void main(String[] args) { + try { + WorkflowClient client = DataConverterSupport.newWorkflowClient(); + WorkflowOptions options = + new WorkflowOptions.Builder() + .setTaskList(DataConverterConstants.TASK_LIST_COMPRESSION) + .setExecutionStartToCloseTimeout(Duration.ofMinutes(1)) + .setWorkflowId("compression-" + UUID.randomUUID()) + .build(); + + CompressedDataConverterWorkflow.WorkflowIface workflow = + client.newWorkflowStub(CompressedDataConverterWorkflow.WorkflowIface.class, options); + + WorkflowClient.start(workflow::run); + System.out.println( + "Started CompressedDataConverterWorkflow on task list \"" + + DataConverterConstants.TASK_LIST_COMPRESSION + + "\"."); + System.exit(0); + } catch (RuntimeException e) { + if (DataConverterSupport.printHintIfDomainMissing(e)) { + System.exit(1); + } + throw e; + } + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java new file mode 100644 index 00000000..7b798485 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java @@ -0,0 +1,64 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.samples.common.SampleConstants; + +/** + * Shared identifiers for the DataConverter samples. + * + *

Each of the three samples runs on its own task list so it can have its own + * {@code DataConverter}. {@code DataConverter} is bound to a {@code WorkflowClient}, and each task + * list maps to one worker built from one client; that is why one process needs three clients to + * host all three samples. + */ +public final class DataConverterConstants { + + private DataConverterConstants() {} + + /** Cadence domain shared with the rest of the samples (registered via {@code RegisterDomain}). */ + public static final String DOMAIN = SampleConstants.DOMAIN; + + /** Task list for the gzip-compression sample worker. */ + public static final String TASK_LIST_COMPRESSION = "data-compression"; + + /** Task list for the AES-256-GCM encryption sample worker. */ + public static final String TASK_LIST_ENCRYPTION = "data-encryption"; + + /** Task list for the S3 / claim-check offload sample worker. */ + public static final String TASK_LIST_S3 = "data-s3"; + + /** Registered workflow type for {@code CompressedDataConverterWorkflow}. */ + public static final String COMPRESSION_WORKFLOW_TYPE = "CompressionDataConverterWorkflow"; + + /** Registered workflow type for {@code EncryptedDataConverterWorkflow}. */ + public static final String ENCRYPTION_WORKFLOW_TYPE = "EncryptionDataConverterWorkflow"; + + /** Registered workflow type for {@code S3OffloadDataConverterWorkflow}. */ + public static final String S3_OFFLOAD_WORKFLOW_TYPE = "S3OffloadDataConverterWorkflow"; + + /** Logical bucket / prefix embedded in S3-offload reference keys. */ + public static final String S3_BUCKET = "data-s3"; + + /** + * Payloads larger than this are offloaded to the BlobStore by {@link S3OffloadDataConverter}. + * Cadence's default max payload is roughly 2 MB; the threshold is set intentionally low so the + * demo workflow comfortably triggers offloading. + */ + public static final int S3_DEFAULT_THRESHOLD_BYTES = 4096; +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java new file mode 100644 index 00000000..1f072fe4 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java @@ -0,0 +1,79 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowClientOptions; +import com.uber.cadence.converter.DataConverter; +import com.uber.cadence.internal.compatibility.Thrift2ProtoAdapter; +import com.uber.cadence.internal.compatibility.proto.serviceclient.IGrpcServiceStubs; + +/** Shared client factory and friendly errors for the DataConverter sample starters and worker. */ +final class DataConverterSupport { + + private DataConverterSupport() {} + + /** + * Builds a WorkflowClient with the given DataConverter on the configured domain. The Worker + * derived from this client will use the same converter for all serialization. + */ + static WorkflowClient newWorkflowClient(DataConverter dataConverter) { + WorkflowClientOptions.Builder builder = + WorkflowClientOptions.newBuilder().setDomain(DataConverterConstants.DOMAIN); + if (dataConverter != null) { + builder.setDataConverter(dataConverter); + } + return WorkflowClient.newInstance( + new Thrift2ProtoAdapter(IGrpcServiceStubs.newInstance()), builder.build()); + } + + /** Builds a WorkflowClient using the default JSON DataConverter. */ + static WorkflowClient newWorkflowClient() { + return newWorkflowClient(null); + } + + /** + * Prints a copy-paste hint when the Cadence error indicates the sample domain has not been + * registered. + * + * @return true if {@code t} was a missing-domain error and a hint was printed (caller should + * exit). + */ + static boolean printHintIfDomainMissing(Throwable t) { + for (Throwable c = t; c != null; c = c.getCause()) { + String m = c.getMessage(); + if (m != null && m.contains("Domain") && m.contains("does not exist")) { + System.err.println(); + System.err.println( + "Cadence reported that the domain \"" + + DataConverterConstants.DOMAIN + + "\" does not exist."); + System.err.println("Register it once against your cluster, then run this again:"); + System.err.println(); + System.err.println( + " ./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain"); + System.err.println(); + System.err.println("Or with Cadence CLI:"); + System.err.println(" cadence --domain " + DataConverterConstants.DOMAIN + " domain register"); + System.err.println(); + return true; + } + } + return false; + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java new file mode 100644 index 00000000..b527cb03 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java @@ -0,0 +1,214 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.converter.DataConverter; +import com.uber.cadence.converter.JsonDataConverter; +import com.uber.cadence.worker.Worker; +import com.uber.cadence.worker.WorkerFactory; + +/** + * Hosts all three DataConverter sample workers in a single process. Each sample uses its own + * {@link WorkflowClient} (and therefore its own {@link WorkerFactory}) because the + * {@code DataConverter} is bound to {@code WorkflowClientOptions}. + * + *

On startup the worker prints a stats banner per sample showing the visible benefit of each + * pattern (compression ratio, ciphertext preview, claim-check size), then begins polling all three + * task lists in the background. + */ +public final class DataConverterWorker { + + private DataConverterWorker() {} + + public static void main(String[] args) { + DataConverter compressionConverter = new CompressedJsonDataConverter(); + DataConverter encryptionConverter = new EncryptedJsonDataConverter(EncryptionKeyLoader.loadEncryptionKey()); + LocalFsBlobStore blobStore = new LocalFsBlobStore(); + DataConverter s3Converter = + new S3OffloadDataConverter( + blobStore, + DataConverterConstants.S3_BUCKET, + DataConverterConstants.S3_DEFAULT_THRESHOLD_BYTES); + + WorkerFactory compressionFactory = startCompressionWorker(compressionConverter); + WorkerFactory encryptionFactory = startEncryptionWorker(encryptionConverter); + WorkerFactory s3Factory = startS3OffloadWorker(s3Converter); + + printCompressionStats(compressionConverter); + printEncryptionStats(encryptionConverter); + printS3OffloadStats(blobStore); + + System.out.println( + "DataConverterWorker listening on \"" + + DataConverterConstants.TASK_LIST_COMPRESSION + + "\", \"" + + DataConverterConstants.TASK_LIST_ENCRYPTION + + "\", \"" + + DataConverterConstants.TASK_LIST_S3 + + "\" (domain \"" + + DataConverterConstants.DOMAIN + + "\")."); + + // Keep references so the factories aren't GC'd while the process runs. + Runtime.getRuntime() + .addShutdownHook( + new Thread( + () -> { + compressionFactory.shutdown(); + encryptionFactory.shutdown(); + s3Factory.shutdown(); + })); + } + + private static WorkerFactory startCompressionWorker(DataConverter converter) { + WorkflowClient client = DataConverterSupport.newWorkflowClient(converter); + WorkerFactory factory = WorkerFactory.newInstance(client); + Worker worker = factory.newWorker(DataConverterConstants.TASK_LIST_COMPRESSION); + worker.registerWorkflowImplementationTypes(CompressedDataConverterWorkflow.WorkflowImpl.class); + worker.registerActivitiesImplementations(new CompressedDataConverterWorkflow.ActivitiesImpl()); + factory.start(); + return factory; + } + + private static WorkerFactory startEncryptionWorker(DataConverter converter) { + WorkflowClient client = DataConverterSupport.newWorkflowClient(converter); + WorkerFactory factory = WorkerFactory.newInstance(client); + Worker worker = factory.newWorker(DataConverterConstants.TASK_LIST_ENCRYPTION); + worker.registerWorkflowImplementationTypes(EncryptedDataConverterWorkflow.WorkflowImpl.class); + worker.registerActivitiesImplementations(new EncryptedDataConverterWorkflow.ActivitiesImpl()); + factory.start(); + return factory; + } + + private static WorkerFactory startS3OffloadWorker(DataConverter converter) { + WorkflowClient client = DataConverterSupport.newWorkflowClient(converter); + WorkerFactory factory = WorkerFactory.newInstance(client); + Worker worker = factory.newWorker(DataConverterConstants.TASK_LIST_S3); + worker.registerWorkflowImplementationTypes(S3OffloadDataConverterWorkflow.WorkflowImpl.class); + worker.registerActivitiesImplementations(new S3OffloadDataConverterWorkflow.ActivitiesImpl()); + factory.start(); + return factory; + } + + // ---------------- Stats banners ---------------- + + private static void printCompressionStats(DataConverter converter) { + CompressedDataConverterWorkflow.LargePayload payload = + CompressedDataConverterWorkflow.createLargePayload(); + byte[] originalJson = JsonDataConverter.getInstance().toData(payload); + byte[] compressed = converter.toData(payload); + int originalSize = originalJson == null ? 0 : originalJson.length; + int compressedSize = compressed == null ? 0 : compressed.length; + double pct = originalSize == 0 ? 0.0 : (1.0 - (double) compressedSize / originalSize) * 100.0; + + System.out.println(); + System.out.println("=== Compression Sample Statistics ==="); + System.out.printf("Original JSON size: %d bytes (%.2f KB)%n", originalSize, originalSize / 1024.0); + System.out.printf("Compressed size: %d bytes (%.2f KB)%n", compressedSize, compressedSize / 1024.0); + System.out.printf("Compression ratio: %.2f%% reduction%n", pct); + System.out.printf( + "Space saved: %d bytes (%.2f KB)%n", + originalSize - compressedSize, (originalSize - compressedSize) / 1024.0); + System.out.printf( + "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", + DataConverterConstants.DOMAIN, + DataConverterConstants.TASK_LIST_COMPRESSION, + DataConverterConstants.COMPRESSION_WORKFLOW_TYPE); + System.out.println("====================================="); + System.out.println(); + } + + private static void printEncryptionStats(DataConverter converter) { + EncryptedDataConverterWorkflow.SensitiveCustomerRecord record = + EncryptedDataConverterWorkflow.createSensitiveCustomerRecord(); + byte[] plaintext = JsonDataConverter.getInstance().toData(record); + byte[] ciphertext = converter.toData(record); + int plaintextSize = plaintext == null ? 0 : plaintext.length; + int ciphertextSize = ciphertext == null ? 0 : ciphertext.length; + String preview = ciphertext == null ? "" : hexPreview(ciphertext, 40); + + System.out.println(); + System.out.println("=== Encryption Sample Statistics ==="); + System.out.printf("Plaintext JSON size: %d bytes%n", plaintextSize); + System.out.printf( + "Ciphertext size: %d bytes (overhead: %d bytes nonce+tag)%n", + ciphertextSize, ciphertextSize - plaintextSize); + System.out.printf("Ciphertext preview: %s%n", preview); + System.out.printf( + "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", + DataConverterConstants.DOMAIN, + DataConverterConstants.TASK_LIST_ENCRYPTION, + DataConverterConstants.ENCRYPTION_WORKFLOW_TYPE); + System.out.println("===================================="); + System.out.println(); + } + + private static void printS3OffloadStats(LocalFsBlobStore store) { + S3OffloadDataConverterWorkflow.S3LargePayload payload = + S3OffloadDataConverterWorkflow.createS3LargePayload(); + byte[] jsonBytes = JsonDataConverter.getInstance().toData(payload); + int jsonSize = jsonBytes == null ? 0 : jsonBytes.length; + // History footprint = 1 prefix byte + JSON envelope {"__s3_ref":"/"}. + // SHA-256 hex digest is 64 chars; bucket + "/" + 64 hex chars. + int cadenceBytes = + 1 + + ("{\"__s3_ref\":\"" + + DataConverterConstants.S3_BUCKET + + "/" + + repeatChar('a', 64) + + "\"}") + .length(); + + System.out.println(); + System.out.println("=== S3 Offload Sample Statistics ==="); + System.out.printf("Full payload JSON size: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); + System.out.printf("Stored in BlobStore: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); + System.out.printf( + "Stored in Cadence history: %d bytes (claim-check reference only)%n", cadenceBytes); + System.out.printf( + "Reduction in Cadence: %.1f%%%n", + jsonSize == 0 ? 0.0 : 100.0 * (1.0 - (double) cadenceBytes / jsonSize)); + System.out.printf("BlobStore location: %s%n", store.baseDir()); + System.out.printf( + "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", + DataConverterConstants.DOMAIN, + DataConverterConstants.TASK_LIST_S3, + DataConverterConstants.S3_OFFLOAD_WORKFLOW_TYPE); + System.out.println("====================================="); + System.out.println(); + } + + private static String hexPreview(byte[] data, int byteLimit) { + int len = Math.min(byteLimit, data.length); + StringBuilder sb = new StringBuilder(len * 2 + 3); + for (int i = 0; i < len; i++) { + sb.append(String.format("%02x", data[i] & 0xff)); + } + if (data.length > byteLimit) { + sb.append("..."); + } + return sb.toString(); + } + + private static String repeatChar(char c, int n) { + char[] buf = new char[n]; + java.util.Arrays.fill(buf, c); + return new String(buf); + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java new file mode 100644 index 00000000..142e3865 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java @@ -0,0 +1,130 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.activity.ActivityMethod; +import com.uber.cadence.activity.ActivityOptions; +import com.uber.cadence.workflow.Workflow; +import com.uber.cadence.workflow.WorkflowMethod; +import java.time.Duration; + +/** + * Demonstrates AES-256-GCM encryption as a Cadence {@code DataConverter}. Every workflow input, + * output, and activity parameter is encrypted before being written to Cadence history. Without + * the key, the data is opaque to anyone browsing workflow history — including Cadence operators. + * + *

The workflow takes no inputs and builds its own sensitive payload internally so it can be + * started from the Cadence CLI without bundling the encryption key into the caller. + */ +public final class EncryptedDataConverterWorkflow { + + private EncryptedDataConverterWorkflow() {} + + // ---------------- POJOs ---------------- + + /** PII / PHI-style record that must be encrypted in workflow history. */ + public static final class SensitiveCustomerRecord { + public String customerId; + public String fullName; + public String email; + public String ssn; + public String creditCardNumber; + public String billingAddress; + public String medicalNotes; + public String diagnosisCode; + public String prescriptions; + public String insuranceId; + public String processedBy; + + public SensitiveCustomerRecord() {} + } + + /** Builds a sample customer record with realistic-looking PII and PHI fields. */ + public static SensitiveCustomerRecord createSensitiveCustomerRecord() { + SensitiveCustomerRecord r = new SensitiveCustomerRecord(); + r.customerId = "cust_8a7f3b2e"; + r.fullName = "Jane A. Doe"; + r.email = "jane.doe@example.com"; + r.ssn = "123-45-6789"; + r.creditCardNumber = "4111-1111-1111-1111"; + r.billingAddress = "1234 Elm Street, Springfield, IL 62701"; + r.medicalNotes = + "Patient presents with hypertension and type-2 diabetes. Advised dietary changes and " + + "increased physical activity. Follow-up scheduled in 3 months."; + r.diagnosisCode = "I10, E11.9"; + r.prescriptions = "Lisinopril 10mg once daily; Metformin 500mg twice daily"; + r.insuranceId = "INS-987654321"; + r.processedBy = "workflow-processor-v2"; + return r; + } + + // ---------------- Workflow + activity ---------------- + + public interface WorkflowIface { + + @WorkflowMethod( + name = DataConverterConstants.ENCRYPTION_WORKFLOW_TYPE, + executionStartToCloseTimeoutSeconds = 60, + taskList = DataConverterConstants.TASK_LIST_ENCRYPTION) + SensitiveCustomerRecord run(); + } + + public interface Activities { + + @ActivityMethod(scheduleToCloseTimeoutSeconds = 60) + SensitiveCustomerRecord processCustomerRecord(SensitiveCustomerRecord record); + } + + public static final class WorkflowImpl implements WorkflowIface { + + private final Activities activities = + Workflow.newActivityStub( + Activities.class, + new ActivityOptions.Builder() + .setScheduleToStartTimeout(Duration.ofMinutes(1)) + .setStartToCloseTimeout(Duration.ofMinutes(1)) + .build()); + + @Override + public SensitiveCustomerRecord run() { + SensitiveCustomerRecord record = createSensitiveCustomerRecord(); + + Workflow.getLogger(EncryptedDataConverterWorkflow.class) + .info( + "Encryption workflow started: customer_id={}. All PII/PHI will be encrypted before storage.", + record.customerId); + + SensitiveCustomerRecord result = activities.processCustomerRecord(record); + + Workflow.getLogger(EncryptedDataConverterWorkflow.class) + .info( + "Encryption workflow completed: customer_id={}. PII/PHI was automatically AES-256-GCM encrypted/decrypted.", + result.customerId); + return result; + } + } + + public static final class ActivitiesImpl implements Activities { + + @Override + public SensitiveCustomerRecord processCustomerRecord(SensitiveCustomerRecord record) { + record.processedBy = record.processedBy + " (Encrypted)"; + return record; + } + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java new file mode 100644 index 00000000..d023426c --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java @@ -0,0 +1,124 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.converter.DataConverter; +import com.uber.cadence.converter.DataConverterException; +import com.uber.cadence.converter.JsonDataConverter; +import java.lang.reflect.Type; +import java.security.GeneralSecurityException; +import java.security.SecureRandom; +import javax.crypto.Cipher; +import javax.crypto.spec.GCMParameterSpec; +import javax.crypto.spec.SecretKeySpec; + +/** + * {@link DataConverter} that JSON-encodes via {@link JsonDataConverter} and then encrypts with + * AES-256-GCM. + * + *

Every workflow input, output, and activity parameter is encrypted before being written to + * Cadence history. Without the key, the data stored by the Cadence server — including any operator + * browsing workflow history — is completely opaque. + * + *

Output layout: {@code nonce(12 bytes) || ciphertext+tag(16 bytes)}. The random nonce means + * the same plaintext produces different ciphertext on every call, preventing replay detection by + * an attacker who observes Cadence history. The GCM authentication tag ensures any ciphertext + * tampering is detected at decode time. + */ +public final class EncryptedJsonDataConverter implements DataConverter { + + private static final DataConverter delegate = JsonDataConverter.getInstance(); + private static final String TRANSFORM = "AES/GCM/NoPadding"; + private static final int NONCE_BYTES = 12; + private static final int TAG_BITS = 128; + + private final SecretKeySpec key; + private final SecureRandom random = new SecureRandom(); + + /** + * @param keyBytes 32-byte AES-256 key. The caller is responsible for sourcing this from a + * secrets manager in production; see {@link EncryptionKeyLoader}. + * @throws IllegalArgumentException if the key is not 32 bytes. + */ + public EncryptedJsonDataConverter(byte[] keyBytes) { + if (keyBytes == null || keyBytes.length != 32) { + throw new IllegalArgumentException( + "AES-256 key must be exactly 32 bytes, got " + (keyBytes == null ? 0 : keyBytes.length)); + } + this.key = new SecretKeySpec(keyBytes, "AES"); + } + + @Override + public byte[] toData(Object... values) throws DataConverterException { + if (values == null || values.length == 0) { + return null; + } + byte[] jsonBytes = delegate.toData(values); + if (jsonBytes == null || jsonBytes.length == 0) { + return jsonBytes; + } + try { + byte[] nonce = new byte[NONCE_BYTES]; + random.nextBytes(nonce); + Cipher cipher = Cipher.getInstance(TRANSFORM); + cipher.init(Cipher.ENCRYPT_MODE, key, new GCMParameterSpec(TAG_BITS, nonce)); + byte[] ciphertext = cipher.doFinal(jsonBytes); + + byte[] out = new byte[NONCE_BYTES + ciphertext.length]; + System.arraycopy(nonce, 0, out, 0, NONCE_BYTES); + System.arraycopy(ciphertext, 0, out, NONCE_BYTES, ciphertext.length); + return out; + } catch (GeneralSecurityException e) { + throw new DataConverterException("Failed to AES-256-GCM encrypt payload", e); + } + } + + @Override + public T fromData(byte[] content, Class valueClass, Type valueType) + throws DataConverterException { + if (content == null || content.length == 0) { + return delegate.fromData(content, valueClass, valueType); + } + return delegate.fromData(decrypt(content), valueClass, valueType); + } + + @Override + public Object[] fromDataArray(byte[] content, Type... valueTypes) throws DataConverterException { + if (content == null || content.length == 0) { + return delegate.fromDataArray(content, valueTypes); + } + return delegate.fromDataArray(decrypt(content), valueTypes); + } + + private byte[] decrypt(byte[] content) throws DataConverterException { + if (content.length < NONCE_BYTES) { + throw new DataConverterException( + "Ciphertext too short: " + content.length + " bytes (need at least " + NONCE_BYTES + ")", + null); + } + try { + byte[] nonce = new byte[NONCE_BYTES]; + System.arraycopy(content, 0, nonce, 0, NONCE_BYTES); + Cipher cipher = Cipher.getInstance(TRANSFORM); + cipher.init(Cipher.DECRYPT_MODE, key, new GCMParameterSpec(TAG_BITS, nonce)); + return cipher.doFinal(content, NONCE_BYTES, content.length - NONCE_BYTES); + } catch (GeneralSecurityException e) { + throw new DataConverterException("Failed to AES-256-GCM decrypt payload", e); + } + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java new file mode 100644 index 00000000..720de46d --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java @@ -0,0 +1,83 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import java.nio.charset.StandardCharsets; + +/** + * Loads the 32-byte AES-256 key for {@link EncryptedJsonDataConverter}. + * + *

Reads the key from the {@code CADENCE_ENCRYPTION_KEY} environment variable as 64 hex + * characters (32 bytes). If the env var is unset, falls back to a hardcoded demo key with a + * warning. If the env var is set but invalid, throws — silently falling back to the public demo + * key when the user clearly intended their own key would be a security hole. + */ +public final class EncryptionKeyLoader { + + private EncryptionKeyLoader() {} + + /** Hardcoded 32-byte key used ONLY when {@code CADENCE_ENCRYPTION_KEY} is unset. */ + static final byte[] DEMO_ENCRYPTION_KEY = + "cadence-demo-key-NOT-FOR-PROD!!!".getBytes(StandardCharsets.US_ASCII); + + /** + * Returns a 32-byte AES-256 key from {@code CADENCE_ENCRYPTION_KEY} or the demo key. + * + * @throws IllegalStateException if the env var is set but not valid hex or not 32 bytes long. + */ + public static byte[] loadEncryptionKey() { + String hexKey = System.getenv("CADENCE_ENCRYPTION_KEY"); + if (hexKey == null || hexKey.isEmpty()) { + System.out.println("WARNING: CADENCE_ENCRYPTION_KEY not set. Using hardcoded demo key."); + System.out.println("WARNING: DO NOT USE THE DEMO KEY IN PRODUCTION."); + return DEMO_ENCRYPTION_KEY.clone(); + } + byte[] key; + try { + key = hexDecode(hexKey); + } catch (IllegalArgumentException e) { + throw new IllegalStateException("CADENCE_ENCRYPTION_KEY is not valid hex: " + e.getMessage(), e); + } + if (key.length != 32) { + throw new IllegalStateException( + "CADENCE_ENCRYPTION_KEY must be exactly 64 hex chars (32 bytes), got " + + hexKey.length() + + " hex chars (" + + key.length + + " bytes)"); + } + return key; + } + + private static byte[] hexDecode(String s) { + int len = s.length(); + if ((len & 1) != 0) { + throw new IllegalArgumentException("odd-length hex string"); + } + byte[] out = new byte[len / 2]; + for (int i = 0; i < len; i += 2) { + int hi = Character.digit(s.charAt(i), 16); + int lo = Character.digit(s.charAt(i + 1), 16); + if (hi < 0 || lo < 0) { + throw new IllegalArgumentException("non-hex character at offset " + i); + } + out[i / 2] = (byte) ((hi << 4) | lo); + } + return out; + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java new file mode 100644 index 00000000..47bb0257 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java @@ -0,0 +1,70 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowOptions; +import java.time.Duration; +import java.util.UUID; + +/** + * Starts {@link EncryptedDataConverterWorkflow} (async, fire-and-forget). + * + *

The workflow takes no inputs and generates its own payload, so this starter does not need + * the encryption key — the worker owns the key. The same effect can be achieved from the Cadence + * CLI via: + * + *

+ * cadence --domain samples-domain \
+ *   workflow start \
+ *   --workflow_type EncryptionDataConverterWorkflow \
+ *   --tl data-encryption \
+ *   --et 60
+ * 
+ */ +public final class EncryptionStarter { + + private EncryptionStarter() {} + + public static void main(String[] args) { + try { + WorkflowClient client = DataConverterSupport.newWorkflowClient(); + WorkflowOptions options = + new WorkflowOptions.Builder() + .setTaskList(DataConverterConstants.TASK_LIST_ENCRYPTION) + .setExecutionStartToCloseTimeout(Duration.ofMinutes(1)) + .setWorkflowId("encryption-" + UUID.randomUUID()) + .build(); + + EncryptedDataConverterWorkflow.WorkflowIface workflow = + client.newWorkflowStub(EncryptedDataConverterWorkflow.WorkflowIface.class, options); + + WorkflowClient.start(workflow::run); + System.out.println( + "Started EncryptedDataConverterWorkflow on task list \"" + + DataConverterConstants.TASK_LIST_ENCRYPTION + + "\"."); + System.exit(0); + } catch (RuntimeException e) { + if (DataConverterSupport.printHintIfDomainMissing(e)) { + System.exit(1); + } + throw e; + } + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java b/src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java new file mode 100644 index 00000000..d60a0971 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java @@ -0,0 +1,74 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * {@link BlobStore} implementation backed by the local filesystem. + * + *

The default zero-config implementation used by {@link S3OffloadDataConverter} when running + * the demo without real AWS. Files are written under {@code + * ${java.io.tmpdir}/cadence-java-samples-data-s3/}. + */ +public final class LocalFsBlobStore implements BlobStore { + + private final Path baseDir; + + public LocalFsBlobStore() { + this(Paths.get(System.getProperty("java.io.tmpdir"), "cadence-java-samples-data-s3")); + } + + public LocalFsBlobStore(Path baseDir) { + this.baseDir = baseDir; + try { + Files.createDirectories(baseDir); + } catch (IOException e) { + throw new IllegalStateException("Failed to create blob store dir " + baseDir, e); + } + } + + /** Returns the directory the store writes to (useful for stats banners). */ + public Path baseDir() { + return baseDir; + } + + @Override + public void put(String key, byte[] data) throws IOException { + Files.write(baseDir.resolve(sanitizeKey(key)), data); + } + + @Override + public byte[] get(String key) throws IOException { + return Files.readAllBytes(baseDir.resolve(sanitizeKey(key))); + } + + /** + * Turns a {@code bucket/sha256hex} key into a single safe filename. Keys are always generated + * internally by the DataConverter, but this provides a belt-and-suspenders guarantee against + * directory traversal in case a future caller passes a user-controlled key. + */ + private static String sanitizeKey(String key) { + String flat = key.replace('/', '_').replace('\\', '_'); + int slash = Math.max(flat.lastIndexOf('/'), flat.lastIndexOf('\\')); + return slash >= 0 ? flat.substring(slash + 1) : flat; + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/README.md b/src/main/java/com/uber/cadence/samples/dataconverter/README.md new file mode 100644 index 00000000..1492847a --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/README.md @@ -0,0 +1,170 @@ +# DataConverter Samples + +Three production-ready patterns for custom `DataConverter` implementations in the Cadence Java client: **compression**, **encryption**, and **S3 / claim-check offload**. A `DataConverter` controls how every workflow input, output, and activity parameter is serialized before it is written to Cadence history — making it the right place to add compression, encryption, or external offloading without changing any workflow or activity code. + +## What is a DataConverter? + +`com.uber.cadence.converter.DataConverter` defines three methods: + +- `byte[] toData(Object... values)` — called before data is written to Cadence history. +- ` T fromData(byte[] content, Class valueClass, Type valueType)` — called for single-value payloads (workflow/activity results, internal payloads). +- `Object[] fromDataArray(byte[] content, Type... valueTypes)` — called to decode workflow/activity argument lists on the worker side. + +The same `DataConverter` must be used by **both the worker and any client that sends or receives non-trivial workflow data**. In these samples the workflows generate their payloads internally and take no inputs, so they can be started from the Cadence CLI without bundling a custom converter into the CLI itself. + +Each sample uses its own task list so it can have its own `DataConverter`. `DataConverterWorker` starts one worker per task list in a single process. + +## Prerequisites + +1. Cadence server running (e.g. Docker Compose from the [Cadence repo](https://github.com/uber/cadence)). +2. From the repo root, build: `./gradlew build`. + +### Register the domain (required once per cluster) + +Starters use domain **`samples-domain`**. If you see `Domain samples-domain does not exist`, register it **before** starting workflows: + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain +``` + +Or with the Cadence CLI: + +```bash +cadence --domain samples-domain domain register +``` + +See also the root [README.md](../../../../../../../../README.md). + +## Run the worker (terminal 1) + +Leave this process running. It starts three workers — one per `DataConverter` — and prints a stats banner per sample: + +```bash +cd /path/to/cadence-java-samples +./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.DataConverterWorker +``` + +## Start a workflow (terminal 2) + +Run **one** of the starters per sample run. Each starts a new workflow execution and exits. + +**Compression** — gzip-over-JSON: + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.CompressionStarter +``` + +**Encryption** — AES-256-GCM: + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.EncryptionStarter +``` + +**S3 offload** — claim-check pattern: + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.S3OffloadStarter +``` + +You can also start any of the three from the Cadence CLI; the commands are printed in the worker's stats banner on startup. + +--- + +## Compression Sample + +`CompressedDataConverterWorkflow` demonstrates gzip-over-JSON compression. For repetitive JSON data this typically achieves 60–80% size reduction, lowering storage cost and bandwidth for large workflow payloads. The converter is implemented in [`CompressedJsonDataConverter.java`](CompressedJsonDataConverter.java) — it wraps `JsonDataConverter.getInstance()` and post-processes the resulting bytes through `java.util.zip.GZIP*Stream`. + +- **Task list:** `data-compression` +- **Workflow type:** `CompressionDataConverterWorkflow` + +--- + +## Encryption Sample + +`EncryptedDataConverterWorkflow` demonstrates AES-256-GCM encryption. Every workflow input, output, and activity parameter is encrypted before being written to Cadence history. Without the key, the data stored by the Cadence server — including any operators browsing workflow history — is completely opaque. + +The sample uses a `SensitiveCustomerRecord` containing realistic PII and PHI fields (name, email, SSN, credit card, medical notes) to make the use case concrete. + +- **Task list:** `data-encryption` +- **Workflow type:** `EncryptionDataConverterWorkflow` + +### Encryption key + +By default, the worker uses a hardcoded demo key and prints a prominent warning. To use your own key: + +```bash +export CADENCE_ENCRYPTION_KEY=$(openssl rand -hex 32) +./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.DataConverterWorker +``` + +> **WARNING:** The hardcoded demo key (`cadence-demo-key-NOT-FOR-PROD!!!`) is public. Never use it in production. In production, load your key from a secrets manager (AWS Secrets Manager, HashiCorp Vault, GCP Secret Manager, etc.). + +### How AES-256-GCM works + +- `toData`: JSON-encode arguments → generate a 12-byte random nonce → `Cipher.doFinal` with `AES/GCM/NoPadding` → return `nonce || ciphertext+tag`. +- `fromData` / `fromDataArray`: split nonce from input → `Cipher.doFinal` (decrypt) → JSON-decode. + +The GCM authentication tag (16 bytes) ensures any ciphertext tampering is detected. The random nonce means the same plaintext produces different ciphertext on every call, preventing replay detection by an attacker observing Cadence history. + +--- + +## S3 Offload Sample (claim-check pattern) + +`S3OffloadDataConverterWorkflow` demonstrates the *claim-check* pattern: payloads larger than a configurable threshold are stored in an external [`BlobStore`](BlobStore.java) and only a small reference (a few dozen bytes) travels through Cadence workflow history. This solves Cadence's per-payload size limits (~2 MB) for workflows that pass very large datasets between the workflow and its activities. + +- **Task list:** `data-s3` +- **Workflow type:** `S3OffloadDataConverterWorkflow` + +### How it works + +- `toData`: JSON-encode → if `len(json) > thresholdBytes`, upload to `BlobStore` under a SHA-256 key and return `0x01 || {"__s3_ref":"/"}`. Otherwise return `0x00 || json` inline. +- `fromData` / `fromDataArray`: read prefix byte → if `0x01`, fetch from `BlobStore` and decode; if `0x00`, decode inline. + +SHA-256-of-payload is used as the key so `toData` is idempotent across Cadence workflow replays. Using a fresh UUID per call would write a new orphaned blob on every replay. + +### Default store (zero-config) + +Out of the box, [`LocalFsBlobStore`](LocalFsBlobStore.java) writes blobs to `${java.io.tmpdir}/cadence-java-samples-data-s3/`. No cloud credentials or additional dependencies are needed. + +### Swapping in real AWS S3 + +The top of [`S3OffloadDataConverter.java`](S3OffloadDataConverter.java) contains a commented `S3BlobStore` skeleton showing the AWS SDK v2 calls needed. To enable it: + +1. Add AWS SDK v2 to `build.gradle`: + ```groovy + implementation group: 'software.amazon.awssdk', name: 's3', version: '2.25.0' + ``` +2. Implement `BlobStore` against `software.amazon.awssdk.services.s3.S3Client` (the commented stub shows the exact calls). +3. Replace `new LocalFsBlobStore()` with `new S3BlobStore("my-bucket", "us-east-1")` in `DataConverterWorker`. +4. Set standard AWS environment variables (`AWS_REGION`, `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`) or use an IAM instance role. + +You can also point the SDK at [LocalStack](https://localstack.cloud/) or [MinIO](https://min.io/) for local testing without a real AWS account. + +> **Note on cleanup:** `S3OffloadDataConverter` does not delete blobs after the workflow completes. In production, use S3 object lifecycle policies to automatically expire old blobs. + +--- + +## When to use which pattern + +| Pattern | Best for | +|---------|----------| +| **Compression** | Large repetitive JSON payloads; reducing storage cost without confidentiality requirements | +| **Encryption** | PII, PHI, secrets, or any data that must be unreadable in Cadence history | +| **S3 Offload** | Payloads approaching Cadence's size limits; binary or non-JSON data; cost-conscious archival | + +Patterns can be composed: encrypt-then-compress, or encrypt-then-offload to S3 for maximum security and minimum history size. + +## Source layout + +| File | Purpose | +|------|---------| +| [`DataConverterConstants.java`](DataConverterConstants.java) | Task list and workflow type names plus the shared Cadence domain | +| [`DataConverterSupport.java`](DataConverterSupport.java) | Shared `WorkflowClient` factory + friendly "domain missing" hint | +| [`DataConverterWorker.java`](DataConverterWorker.java) | Hosts all three workers; prints stats banners on startup | +| [`CompressedJsonDataConverter.java`](CompressedJsonDataConverter.java) | gzip-over-JSON `DataConverter` | +| [`EncryptedJsonDataConverter.java`](EncryptedJsonDataConverter.java) | AES-256-GCM `DataConverter` | +| [`EncryptionKeyLoader.java`](EncryptionKeyLoader.java) | Reads `CADENCE_ENCRYPTION_KEY` with demo-key fallback | +| [`BlobStore.java`](BlobStore.java) / [`LocalFsBlobStore.java`](LocalFsBlobStore.java) | `BlobStore` abstraction + local-FS default | +| [`S3OffloadDataConverter.java`](S3OffloadDataConverter.java) | Claim-check `DataConverter` with commented AWS S3 stub | +| `*DataConverterWorkflow.java` | One workflow + activity per sample (each takes no inputs) | +| `*Starter.java` | Thin async starters mirroring the existing `query/` samples | diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java new file mode 100644 index 00000000..c8cc55ee --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java @@ -0,0 +1,222 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.converter.DataConverter; +import com.uber.cadence.converter.DataConverterException; +import com.uber.cadence.converter.JsonDataConverter; +import java.io.IOException; +import java.lang.reflect.Type; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +/** + * {@link DataConverter} that implements the claim-check pattern: payloads larger than + * {@code thresholdBytes} are stored in an external {@link BlobStore} and only a small reference + * travels through Cadence workflow history. + * + *

This solves the practical problem of Cadence's per-payload size limits (~2 MB) for workflows + * that must pass very large datasets between the workflow and its activities, and reduces history + * storage cost for long-running workflows that pass large repeatable data. + * + *

Wire format (after the JSON delegate produces the bytes): + * + *

+ * + *

Keys are derived from the SHA-256 of the payload so {@code toData} is idempotent across + * Cadence workflow replays. Using a fresh UUID per call would write a new orphaned blob on every + * replay because the SDK calls {@code toData} again each time the workflow re-executes from the + * top. If the workflow needs to control the key (e.g. to encode routing metadata), generate it + * with {@code Workflow.sideEffect} and pass it alongside the payload instead. + */ +/* + * ============================================================================= + * S3 BlobStore stub + * + * To use a real AWS S3 bucket instead of the local filesystem: + * 1. Add AWS SDK v2 to build.gradle: + * implementation group: 'software.amazon.awssdk', name: 's3', version: '2.25.0' + * 2. Implement BlobStore against software.amazon.awssdk.services.s3.S3Client: + * + * public final class S3BlobStore implements BlobStore { + * private final S3Client s3; + * private final String bucket; + * + * public S3BlobStore(String bucket, String region) { + * this.s3 = S3Client.builder().region(Region.of(region)).build(); + * this.bucket = bucket; + * } + * + * public void put(String key, byte[] data) { + * s3.putObject( + * PutObjectRequest.builder().bucket(bucket).key(key).build(), + * RequestBody.fromBytes(data)); + * } + * + * public byte[] get(String key) { + * return s3.getObjectAsBytes(GetObjectRequest.builder().bucket(bucket).key(key).build()) + * .asByteArray(); + * } + * } + * + * 3. Replace `new LocalFsBlobStore()` with `new S3BlobStore("my-bucket", "us-east-1")` in + * DataConverterWorker. + * 4. Set standard AWS env vars (AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) or use an + * IAM instance role. + * + * You can also point the SDK at LocalStack or MinIO for local testing without a real AWS account. + * + * Note on cleanup: this DataConverter does not delete blobs after the workflow completes. In + * production, use S3 object lifecycle policies to automatically expire old blobs. + * ============================================================================= + */ +public final class S3OffloadDataConverter implements DataConverter { + + /** Prefix byte for inline (below-threshold) payloads. */ + static final byte INLINE_PREFIX = (byte) 0x00; + + /** Prefix byte for offloaded payloads. */ + static final byte OFFLOAD_PREFIX = (byte) 0x01; + + private static final DataConverter delegate = JsonDataConverter.getInstance(); + + private final BlobStore store; + private final String bucket; + private final int thresholdBytes; + + /** + * @param store the BlobStore backend (use {@link LocalFsBlobStore} for zero-config demo). + * @param bucket logical bucket / prefix name embedded in the reference key. + * @param thresholdBytes max inline payload size; larger payloads are offloaded. + */ + public S3OffloadDataConverter(BlobStore store, String bucket, int thresholdBytes) { + this.store = store; + this.bucket = bucket; + this.thresholdBytes = thresholdBytes; + } + + @Override + public byte[] toData(Object... values) throws DataConverterException { + if (values == null || values.length == 0) { + return null; + } + byte[] jsonBytes = delegate.toData(values); + if (jsonBytes == null || jsonBytes.length == 0) { + return jsonBytes; + } + + if (jsonBytes.length <= thresholdBytes) { + byte[] result = new byte[1 + jsonBytes.length]; + result[0] = INLINE_PREFIX; + System.arraycopy(jsonBytes, 0, result, 1, jsonBytes.length); + return result; + } + + String key = bucket + "/" + sha256Hex(jsonBytes); + try { + store.put(key, jsonBytes); + } catch (IOException e) { + throw new DataConverterException( + "Failed to offload payload to blob store (key=" + key + ")", e); + } + + String envelope = "{\"__s3_ref\":\"" + key + "\"}"; + byte[] envBytes = envelope.getBytes(StandardCharsets.UTF_8); + byte[] result = new byte[1 + envBytes.length]; + result[0] = OFFLOAD_PREFIX; + System.arraycopy(envBytes, 0, result, 1, envBytes.length); + return result; + } + + @Override + public T fromData(byte[] content, Class valueClass, Type valueType) + throws DataConverterException { + byte[] payload = unwrap(content); + return delegate.fromData(payload, valueClass, valueType); + } + + @Override + public Object[] fromDataArray(byte[] content, Type... valueTypes) throws DataConverterException { + byte[] payload = unwrap(content); + return delegate.fromDataArray(payload, valueTypes); + } + + private byte[] unwrap(byte[] content) throws DataConverterException { + if (content == null || content.length == 0) { + return content; + } + byte prefix = content[0]; + byte[] body = new byte[content.length - 1]; + System.arraycopy(content, 1, body, 0, body.length); + + switch (prefix) { + case INLINE_PREFIX: + return body; + case OFFLOAD_PREFIX: + String key = extractS3Ref(new String(body, StandardCharsets.UTF_8)); + try { + return store.get(key); + } catch (IOException e) { + throw new DataConverterException( + "s3 offload: failed to fetch payload from blob store (key=" + key + ")", e); + } + default: + throw new DataConverterException( + "s3 offload: unknown prefix byte 0x" + String.format("%02x", prefix & 0xff), null); + } + } + + /** + * Extracts the value of {@code __s3_ref} from the envelope JSON without bringing in a JSON + * parser. The envelope is produced by this class, so the format is fixed and trivially parseable. + */ + private static String extractS3Ref(String envelopeJson) throws DataConverterException { + String marker = "\"__s3_ref\":\""; + int start = envelopeJson.indexOf(marker); + if (start < 0) { + throw new DataConverterException( + "s3 offload: envelope missing __s3_ref field: " + envelopeJson, null); + } + start += marker.length(); + int end = envelopeJson.indexOf('"', start); + if (end < 0) { + throw new DataConverterException( + "s3 offload: envelope __s3_ref field is unterminated: " + envelopeJson, null); + } + return envelopeJson.substring(start, end); + } + + private static String sha256Hex(byte[] data) throws DataConverterException { + try { + MessageDigest md = MessageDigest.getInstance("SHA-256"); + byte[] digest = md.digest(data); + StringBuilder sb = new StringBuilder(digest.length * 2); + for (byte b : digest) { + sb.append(String.format("%02x", b & 0xff)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException e) { + throw new DataConverterException("SHA-256 is not available in this JVM", e); + } + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java new file mode 100644 index 00000000..215fd320 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java @@ -0,0 +1,153 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.activity.ActivityMethod; +import com.uber.cadence.activity.ActivityOptions; +import com.uber.cadence.workflow.Workflow; +import com.uber.cadence.workflow.WorkflowMethod; +import java.time.Duration; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Demonstrates the claim-check pattern: payloads larger than the configured threshold are stored + * in an external {@link BlobStore} and only a small reference travels through Cadence history. + * + *

The workflow takes no inputs and builds a payload well above the threshold internally so it + * can be started from the Cadence CLI and every run exercises the offload path. + */ +public final class S3OffloadDataConverterWorkflow { + + private S3OffloadDataConverterWorkflow() {} + + // ---------------- POJOs ---------------- + + public static final class S3LargePayload { + public String jobId; + public String description; + public List dataPoints; + public Map metadata; + public String processedBy; + + public S3LargePayload() {} + } + + public static final class S3DataPoint { + public String timestamp; + public String metric; + public double value; + public String tags; + + public S3DataPoint() {} + } + + /** + * Builds a payload comfortably larger than {@link + * DataConverterConstants#S3_DEFAULT_THRESHOLD_BYTES} so every workflow run triggers an offload. + */ + public static S3LargePayload createS3LargePayload() { + S3LargePayload p = new S3LargePayload(); + p.jobId = "batch-job-20240115-001"; + p.description = repeat( + "Large telemetry batch job containing sensor readings from the production cluster. ", 10); + + p.dataPoints = new ArrayList<>(200); + for (int i = 0; i < 200; i++) { + S3DataPoint dp = new S3DataPoint(); + dp.timestamp = String.format("2024-01-15T%02d:30:00Z", i % 24); + dp.metric = String.format("telemetry.sensor_%03d.temperature", i); + dp.value = 20.0 + (i % 30) / 10.0; + dp.tags = String.format("region=us-east-1,host=node-%03d,env=production", i % 10); + p.dataPoints.add(dp); + } + + p.metadata = new LinkedHashMap<>(); + for (int i = 0; i < 20; i++) { + p.metadata.put(String.format("batch_key_%02d", i), repeat("value-data-", 5)); + } + p.processedBy = "s3-offload-worker-v1"; + return p; + } + + private static String repeat(String s, int n) { + StringBuilder sb = new StringBuilder(s.length() * n); + for (int i = 0; i < n; i++) { + sb.append(s); + } + return sb.toString(); + } + + // ---------------- Workflow + activity ---------------- + + public interface WorkflowIface { + + @WorkflowMethod( + name = DataConverterConstants.S3_OFFLOAD_WORKFLOW_TYPE, + executionStartToCloseTimeoutSeconds = 60, + taskList = DataConverterConstants.TASK_LIST_S3) + S3LargePayload run(); + } + + public interface Activities { + + @ActivityMethod(scheduleToCloseTimeoutSeconds = 60) + S3LargePayload processS3Payload(S3LargePayload payload); + } + + public static final class WorkflowImpl implements WorkflowIface { + + private final Activities activities = + Workflow.newActivityStub( + Activities.class, + new ActivityOptions.Builder() + .setScheduleToStartTimeout(Duration.ofMinutes(1)) + .setStartToCloseTimeout(Duration.ofMinutes(1)) + .build()); + + @Override + public S3LargePayload run() { + S3LargePayload payload = createS3LargePayload(); + + Workflow.getLogger(S3OffloadDataConverterWorkflow.class) + .info( + "S3 offload workflow started: job_id={}, data_points={}. Payload will be offloaded; only a reference travels through Cadence history.", + payload.jobId, + payload.dataPoints.size()); + + S3LargePayload result = activities.processS3Payload(payload); + + Workflow.getLogger(S3OffloadDataConverterWorkflow.class) + .info( + "S3 offload workflow completed: job_id={}. Payload was transparently offloaded and retrieved via the BlobStore.", + result.jobId); + return result; + } + } + + public static final class ActivitiesImpl implements Activities { + + @Override + public S3LargePayload processS3Payload(S3LargePayload payload) { + payload.processedBy = payload.processedBy + " (Processed)"; + return payload; + } + } +} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java new file mode 100644 index 00000000..a8535812 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java @@ -0,0 +1,70 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.dataconverter; + +import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowOptions; +import java.time.Duration; +import java.util.UUID; + +/** + * Starts {@link S3OffloadDataConverterWorkflow} (async, fire-and-forget). + * + *

The workflow takes no inputs and generates its own payload, so this starter does not need to + * use the matching {@link S3OffloadDataConverter}. The same effect can be achieved from the + * Cadence CLI via: + * + *

+ * cadence --domain samples-domain \
+ *   workflow start \
+ *   --workflow_type S3OffloadDataConverterWorkflow \
+ *   --tl data-s3 \
+ *   --et 60
+ * 
+ */ +public final class S3OffloadStarter { + + private S3OffloadStarter() {} + + public static void main(String[] args) { + try { + WorkflowClient client = DataConverterSupport.newWorkflowClient(); + WorkflowOptions options = + new WorkflowOptions.Builder() + .setTaskList(DataConverterConstants.TASK_LIST_S3) + .setExecutionStartToCloseTimeout(Duration.ofMinutes(1)) + .setWorkflowId("s3-offload-" + UUID.randomUUID()) + .build(); + + S3OffloadDataConverterWorkflow.WorkflowIface workflow = + client.newWorkflowStub(S3OffloadDataConverterWorkflow.WorkflowIface.class, options); + + WorkflowClient.start(workflow::run); + System.out.println( + "Started S3OffloadDataConverterWorkflow on task list \"" + + DataConverterConstants.TASK_LIST_S3 + + "\"."); + System.exit(0); + } catch (RuntimeException e) { + if (DataConverterSupport.printHintIfDomainMissing(e)) { + System.exit(1); + } + throw e; + } + } +} From bf7bc7d4be323d1bfbc19bbb4ed812ead01895bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= Date: Mon, 11 May 2026 10:51:47 -0700 Subject: [PATCH 02/11] fix(dataconverter): harden custom converter payload handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “Kevin” --- .../CompressedJsonDataConverter.java | 27 ++++++++- .../dataconverter/LocalFsBlobStore.java | 45 ++++++++++----- .../dataconverter/S3OffloadDataConverter.java | 56 ++++++++++--------- 3 files changed, 86 insertions(+), 42 deletions(-) diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java b/src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java index e3cbd068..dcf600d6 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java @@ -34,11 +34,28 @@ *

For repetitive JSON payloads this typically achieves 60-80% size reduction, lowering storage * cost and bandwidth without changing any workflow or activity code. Apply by setting it on the * {@code WorkflowClientOptions} used by both the worker and any client that triggers the workflow. + * The decode path caps decompressed payloads to avoid unbounded memory growth on malformed input. */ public final class CompressedJsonDataConverter implements DataConverter { + /** Production code should choose a limit appropriate for its workflow payload contract. */ + public static final int DEFAULT_MAX_DECOMPRESSED_BYTES = 10 * 1024 * 1024; + private static final DataConverter delegate = JsonDataConverter.getInstance(); + private final int maxDecompressedBytes; + + public CompressedJsonDataConverter() { + this(DEFAULT_MAX_DECOMPRESSED_BYTES); + } + + public CompressedJsonDataConverter(int maxDecompressedBytes) { + if (maxDecompressedBytes <= 0) { + throw new IllegalArgumentException("maxDecompressedBytes must be positive"); + } + this.maxDecompressedBytes = maxDecompressedBytes; + } + @Override public byte[] toData(Object... values) throws DataConverterException { if (values == null || values.length == 0) { @@ -65,7 +82,7 @@ public T fromData(byte[] content, Class valueClass, Type valueType) if (content == null || content.length == 0) { return delegate.fromData(content, valueClass, valueType); } - return delegate.fromData(decompress(content), valueClass, valueType); + return delegate.fromData(decompress(content, maxDecompressedBytes), valueClass, valueType); } @Override @@ -73,15 +90,19 @@ public Object[] fromDataArray(byte[] content, Type... valueTypes) throws DataCon if (content == null || content.length == 0) { return delegate.fromDataArray(content, valueTypes); } - return delegate.fromDataArray(decompress(content), valueTypes); + return delegate.fromDataArray(decompress(content, maxDecompressedBytes), valueTypes); } - private static byte[] decompress(byte[] content) throws DataConverterException { + private static byte[] decompress(byte[] content, int maxBytes) throws DataConverterException { try (GZIPInputStream gzip = new GZIPInputStream(new ByteArrayInputStream(content)); ByteArrayOutputStream out = new ByteArrayOutputStream()) { byte[] buf = new byte[4096]; int read; while ((read = gzip.read(buf)) != -1) { + if (out.size() > maxBytes - read) { + throw new DataConverterException( + "Gunzip payload exceeds maximum size of " + maxBytes + " bytes", null); + } out.write(buf, 0, read); } return out.toByteArray(); diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java b/src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java index d60a0971..b066e5ec 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java @@ -18,15 +18,18 @@ package com.uber.cadence.samples.dataconverter; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; /** * {@link BlobStore} implementation backed by the local filesystem. * - *

The default zero-config implementation used by {@link S3OffloadDataConverter} when running - * the demo without real AWS. Files are written under {@code + *

The default zero-config implementation used by {@link S3OffloadDataConverter} when running the + * demo without real AWS. Files are written under {@code * ${java.io.tmpdir}/cadence-java-samples-data-s3/}. */ public final class LocalFsBlobStore implements BlobStore { @@ -38,11 +41,14 @@ public LocalFsBlobStore() { } public LocalFsBlobStore(Path baseDir) { - this.baseDir = baseDir; + if (baseDir == null) { + throw new IllegalArgumentException("baseDir must not be null"); + } + this.baseDir = baseDir.toAbsolutePath().normalize(); try { - Files.createDirectories(baseDir); + Files.createDirectories(this.baseDir); } catch (IOException e) { - throw new IllegalStateException("Failed to create blob store dir " + baseDir, e); + throw new IllegalStateException("Failed to create blob store dir " + this.baseDir, e); } } @@ -53,22 +59,33 @@ public Path baseDir() { @Override public void put(String key, byte[] data) throws IOException { - Files.write(baseDir.resolve(sanitizeKey(key)), data); + Files.write(baseDir.resolve(filenameForKey(key)), data); } @Override public byte[] get(String key) throws IOException { - return Files.readAllBytes(baseDir.resolve(sanitizeKey(key))); + return Files.readAllBytes(baseDir.resolve(filenameForKey(key))); } /** - * Turns a {@code bucket/sha256hex} key into a single safe filename. Keys are always generated - * internally by the DataConverter, but this provides a belt-and-suspenders guarantee against - * directory traversal in case a future caller passes a user-controlled key. + * Turns any blob-store key into a fixed safe filename. Keys are usually generated internally by + * the DataConverter, but hashing prevents directory traversal even if a future caller passes a + * user-controlled key. */ - private static String sanitizeKey(String key) { - String flat = key.replace('/', '_').replace('\\', '_'); - int slash = Math.max(flat.lastIndexOf('/'), flat.lastIndexOf('\\')); - return slash >= 0 ? flat.substring(slash + 1) : flat; + private static String filenameForKey(String key) throws IOException { + if (key == null || key.isEmpty()) { + throw new IOException("BlobStore key must not be null or empty"); + } + try { + MessageDigest md = MessageDigest.getInstance("SHA-256"); + byte[] digest = md.digest(key.getBytes(StandardCharsets.UTF_8)); + StringBuilder sb = new StringBuilder(digest.length * 2); + for (byte b : digest) { + sb.append(String.format("%02x", b & 0xff)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException e) { + throw new IOException("SHA-256 is not available in this JVM", e); + } } } diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java index c8cc55ee..f14c67bb 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java @@ -22,7 +22,6 @@ import com.uber.cadence.converter.JsonDataConverter; import java.io.IOException; import java.lang.reflect.Type; -import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -39,15 +38,15 @@ * *

    *
  • {@code 0x00 || json} — payload is small enough to inline. - *
  • {@code 0x01 || jsonEnvelope} — payload was offloaded; the envelope JSON has the form - * {@code {"__s3_ref":"/"}}. + *
  • {@code 0x01 || jsonEnvelope} — payload was offloaded; the envelope JSON has the form {@code + * {"s3Ref":"/"}}. *
* *

Keys are derived from the SHA-256 of the payload so {@code toData} is idempotent across * Cadence workflow replays. Using a fresh UUID per call would write a new orphaned blob on every * replay because the SDK calls {@code toData} again each time the workflow re-executes from the - * top. If the workflow needs to control the key (e.g. to encode routing metadata), generate it - * with {@code Workflow.sideEffect} and pass it alongside the payload instead. + * top. If the workflow needs to control the key (e.g. to encode routing metadata), generate it with + * {@code Workflow.sideEffect} and pass it alongside the payload instead. */ /* * ============================================================================= @@ -104,12 +103,31 @@ public final class S3OffloadDataConverter implements DataConverter { private final String bucket; private final int thresholdBytes; + static final class BlobReference { + public String s3Ref; + + public BlobReference() {} + + BlobReference(String s3Ref) { + this.s3Ref = s3Ref; + } + } + /** * @param store the BlobStore backend (use {@link LocalFsBlobStore} for zero-config demo). * @param bucket logical bucket / prefix name embedded in the reference key. * @param thresholdBytes max inline payload size; larger payloads are offloaded. */ public S3OffloadDataConverter(BlobStore store, String bucket, int thresholdBytes) { + if (store == null) { + throw new IllegalArgumentException("store must not be null"); + } + if (bucket == null || bucket.trim().isEmpty()) { + throw new IllegalArgumentException("bucket must not be null or empty"); + } + if (thresholdBytes < 0) { + throw new IllegalArgumentException("thresholdBytes must not be negative"); + } this.store = store; this.bucket = bucket; this.thresholdBytes = thresholdBytes; @@ -140,8 +158,7 @@ public byte[] toData(Object... values) throws DataConverterException { "Failed to offload payload to blob store (key=" + key + ")", e); } - String envelope = "{\"__s3_ref\":\"" + key + "\"}"; - byte[] envBytes = envelope.getBytes(StandardCharsets.UTF_8); + byte[] envBytes = delegate.toData(new BlobReference(key)); byte[] result = new byte[1 + envBytes.length]; result[0] = OFFLOAD_PREFIX; System.arraycopy(envBytes, 0, result, 1, envBytes.length); @@ -173,7 +190,7 @@ private byte[] unwrap(byte[] content) throws DataConverterException { case INLINE_PREFIX: return body; case OFFLOAD_PREFIX: - String key = extractS3Ref(new String(body, StandardCharsets.UTF_8)); + String key = extractS3Ref(body); try { return store.get(key); } catch (IOException e) { @@ -186,24 +203,13 @@ private byte[] unwrap(byte[] content) throws DataConverterException { } } - /** - * Extracts the value of {@code __s3_ref} from the envelope JSON without bringing in a JSON - * parser. The envelope is produced by this class, so the format is fixed and trivially parseable. - */ - private static String extractS3Ref(String envelopeJson) throws DataConverterException { - String marker = "\"__s3_ref\":\""; - int start = envelopeJson.indexOf(marker); - if (start < 0) { - throw new DataConverterException( - "s3 offload: envelope missing __s3_ref field: " + envelopeJson, null); - } - start += marker.length(); - int end = envelopeJson.indexOf('"', start); - if (end < 0) { - throw new DataConverterException( - "s3 offload: envelope __s3_ref field is unterminated: " + envelopeJson, null); + private static String extractS3Ref(byte[] envelopeJson) throws DataConverterException { + BlobReference reference = + delegate.fromData(envelopeJson, BlobReference.class, BlobReference.class); + if (reference == null || reference.s3Ref == null || reference.s3Ref.isEmpty()) { + throw new DataConverterException("s3 offload: envelope missing s3Ref field", null); } - return envelopeJson.substring(start, end); + return reference.s3Ref; } private static String sha256Hex(byte[] data) throws DataConverterException { From a5d80e6751426f94986278b0a40fd7564a762836 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= Date: Mon, 11 May 2026 10:52:42 -0700 Subject: [PATCH 03/11] fix(dataconverter): align workflow type names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “Kevin” --- .../dataconverter/CompressionStarter.java | 2 +- .../dataconverter/DataConverterConstants.java | 12 ++++----- .../dataconverter/DataConverterWorker.java | 27 +++++++++++-------- .../dataconverter/EncryptionStarter.java | 8 +++--- 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java b/src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java index fad3e3de..f2bf0172 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java @@ -32,7 +32,7 @@ *

  * cadence --domain samples-domain \
  *   workflow start \
- *   --workflow_type CompressionDataConverterWorkflow \
+ *   --workflow_type CompressedDataConverterWorkflow \
  *   --tl data-compression \
  *   --et 60
  * 
diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java index 7b798485..71c20b2b 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java @@ -22,10 +22,10 @@ /** * Shared identifiers for the DataConverter samples. * - *

Each of the three samples runs on its own task list so it can have its own - * {@code DataConverter}. {@code DataConverter} is bound to a {@code WorkflowClient}, and each task - * list maps to one worker built from one client; that is why one process needs three clients to - * host all three samples. + *

Each of the three samples runs on its own task list so it can have its own {@code + * DataConverter}. {@code DataConverter} is bound to a {@code WorkflowClient}, and each task list + * maps to one worker built from one client; that is why one process needs three clients to host all + * three samples. */ public final class DataConverterConstants { @@ -44,10 +44,10 @@ private DataConverterConstants() {} public static final String TASK_LIST_S3 = "data-s3"; /** Registered workflow type for {@code CompressedDataConverterWorkflow}. */ - public static final String COMPRESSION_WORKFLOW_TYPE = "CompressionDataConverterWorkflow"; + public static final String COMPRESSION_WORKFLOW_TYPE = "CompressedDataConverterWorkflow"; /** Registered workflow type for {@code EncryptedDataConverterWorkflow}. */ - public static final String ENCRYPTION_WORKFLOW_TYPE = "EncryptionDataConverterWorkflow"; + public static final String ENCRYPTION_WORKFLOW_TYPE = "EncryptedDataConverterWorkflow"; /** Registered workflow type for {@code S3OffloadDataConverterWorkflow}. */ public static final String S3_OFFLOAD_WORKFLOW_TYPE = "S3OffloadDataConverterWorkflow"; diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java index b527cb03..a0965bff 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java @@ -24,9 +24,9 @@ import com.uber.cadence.worker.WorkerFactory; /** - * Hosts all three DataConverter sample workers in a single process. Each sample uses its own - * {@link WorkflowClient} (and therefore its own {@link WorkerFactory}) because the - * {@code DataConverter} is bound to {@code WorkflowClientOptions}. + * Hosts all three DataConverter sample workers in a single process. Each sample uses its own {@link + * WorkflowClient} (and therefore its own {@link WorkerFactory}) because the {@code DataConverter} + * is bound to {@code WorkflowClientOptions}. * *

On startup the worker prints a stats banner per sample showing the visible benefit of each * pattern (compression ratio, ciphertext preview, claim-check size), then begins polling all three @@ -38,7 +38,8 @@ private DataConverterWorker() {} public static void main(String[] args) { DataConverter compressionConverter = new CompressedJsonDataConverter(); - DataConverter encryptionConverter = new EncryptedJsonDataConverter(EncryptionKeyLoader.loadEncryptionKey()); + DataConverter encryptionConverter = + new EncryptedJsonDataConverter(EncryptionKeyLoader.loadEncryptionKey()); LocalFsBlobStore blobStore = new LocalFsBlobStore(); DataConverter s3Converter = new S3OffloadDataConverter( @@ -119,8 +120,10 @@ private static void printCompressionStats(DataConverter converter) { System.out.println(); System.out.println("=== Compression Sample Statistics ==="); - System.out.printf("Original JSON size: %d bytes (%.2f KB)%n", originalSize, originalSize / 1024.0); - System.out.printf("Compressed size: %d bytes (%.2f KB)%n", compressedSize, compressedSize / 1024.0); + System.out.printf( + "Original JSON size: %d bytes (%.2f KB)%n", originalSize, originalSize / 1024.0); + System.out.printf( + "Compressed size: %d bytes (%.2f KB)%n", compressedSize, compressedSize / 1024.0); System.out.printf("Compression ratio: %.2f%% reduction%n", pct); System.out.printf( "Space saved: %d bytes (%.2f KB)%n", @@ -147,7 +150,7 @@ private static void printEncryptionStats(DataConverter converter) { System.out.println("=== Encryption Sample Statistics ==="); System.out.printf("Plaintext JSON size: %d bytes%n", plaintextSize); System.out.printf( - "Ciphertext size: %d bytes (overhead: %d bytes nonce+tag)%n", + "Encrypted payload: %d bytes (growth: %d bytes vs plaintext JSON)%n", ciphertextSize, ciphertextSize - plaintextSize); System.out.printf("Ciphertext preview: %s%n", preview); System.out.printf( @@ -164,11 +167,11 @@ private static void printS3OffloadStats(LocalFsBlobStore store) { S3OffloadDataConverterWorkflow.createS3LargePayload(); byte[] jsonBytes = JsonDataConverter.getInstance().toData(payload); int jsonSize = jsonBytes == null ? 0 : jsonBytes.length; - // History footprint = 1 prefix byte + JSON envelope {"__s3_ref":"/"}. + // History footprint = 1 prefix byte + JSON envelope {"s3Ref":"/"}. // SHA-256 hex digest is 64 chars; bucket + "/" + 64 hex chars. int cadenceBytes = 1 - + ("{\"__s3_ref\":\"" + + ("{\"s3Ref\":\"" + DataConverterConstants.S3_BUCKET + "/" + repeatChar('a', 64) @@ -177,8 +180,10 @@ private static void printS3OffloadStats(LocalFsBlobStore store) { System.out.println(); System.out.println("=== S3 Offload Sample Statistics ==="); - System.out.printf("Full payload JSON size: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); - System.out.printf("Stored in BlobStore: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); + System.out.printf( + "Full payload JSON size: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); + System.out.printf( + "Stored in BlobStore: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); System.out.printf( "Stored in Cadence history: %d bytes (claim-check reference only)%n", cadenceBytes); System.out.printf( diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java index 47bb0257..cac5bb9c 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java @@ -25,14 +25,14 @@ /** * Starts {@link EncryptedDataConverterWorkflow} (async, fire-and-forget). * - *

The workflow takes no inputs and generates its own payload, so this starter does not need - * the encryption key — the worker owns the key. The same effect can be achieved from the Cadence - * CLI via: + *

The workflow takes no inputs and generates its own payload, so this starter does not need the + * encryption key — the worker owns the key. The same effect can be achieved from the Cadence CLI + * via: * *

  * cadence --domain samples-domain \
  *   workflow start \
- *   --workflow_type EncryptionDataConverterWorkflow \
+ *   --workflow_type EncryptedDataConverterWorkflow \
  *   --tl data-encryption \
  *   --et 60
  * 
From 2bccda4450aba40dba283982a37f80227ccfa6d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= Date: Mon, 11 May 2026 10:54:23 -0700 Subject: [PATCH 04/11] docs(dataconverter): clarify security guidance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “Kevin” --- README.md | 2 +- .../EncryptedDataConverterWorkflow.java | 12 ++++++---- .../EncryptedJsonDataConverter.java | 17 ++++++------- .../cadence/samples/dataconverter/README.md | 24 +++++++++---------- 4 files changed, 29 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 2ae81666..d0229489 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ These samples demonstrate various capabilities of Java Cadence client and server * **Custom Workflow Controls** ([`com.uber.cadence.samples.query`](src/main/java/com/uber/cadence/samples/query/)) — workflow queries that return **markdown** for Cadence Web (Markdoc buttons that **signal** workflows or **start** new workflows). **Requires Cadence Web v4.0.14+.** Copy-paste run instructions: [query samples README](src/main/java/com/uber/cadence/samples/query/README.md). -* **DataConverter Samples** ([`com.uber.cadence.samples.dataconverter`](src/main/java/com/uber/cadence/samples/dataconverter/)) — three production-ready custom `DataConverter` patterns (gzip compression, AES-256-GCM encryption, and S3 / claim-check offload) that transparently transform every workflow input, output, and activity parameter. Copy-paste run instructions: [dataconverter samples README](src/main/java/com/uber/cadence/samples/dataconverter/README.md). +* **DataConverter Samples** ([`com.uber.cadence.samples.dataconverter`](src/main/java/com/uber/cadence/samples/dataconverter/)) — three custom `DataConverter` patterns (gzip compression, AES-256-GCM encryption, and BlobStore / S3 claim-check offload) that transparently transform every workflow input, output, and activity parameter. Copy-paste run instructions: [dataconverter samples README](src/main/java/com/uber/cadence/samples/dataconverter/README.md). ## Get the Samples diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java index 142e3865..e9502a80 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java @@ -25,8 +25,9 @@ /** * Demonstrates AES-256-GCM encryption as a Cadence {@code DataConverter}. Every workflow input, - * output, and activity parameter is encrypted before being written to Cadence history. Without - * the key, the data is opaque to anyone browsing workflow history — including Cadence operators. + * output, and activity parameter is encrypted before being written to Cadence history. Without the + * key, payloads in workflow history are unreadable to anyone browsing history — including Cadence + * operators. Application logs, metrics, and search attributes are not encrypted by a DataConverter. * *

The workflow takes no inputs and builds its own sensitive payload internally so it can be * started from the Cadence CLI without bundling the encryption key into the caller. @@ -78,9 +79,10 @@ public static SensitiveCustomerRecord createSensitiveCustomerRecord() { public interface WorkflowIface { @WorkflowMethod( - name = DataConverterConstants.ENCRYPTION_WORKFLOW_TYPE, - executionStartToCloseTimeoutSeconds = 60, - taskList = DataConverterConstants.TASK_LIST_ENCRYPTION) + name = DataConverterConstants.ENCRYPTION_WORKFLOW_TYPE, + executionStartToCloseTimeoutSeconds = 60, + taskList = DataConverterConstants.TASK_LIST_ENCRYPTION + ) SensitiveCustomerRecord run(); } diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java index d023426c..20c7d803 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java @@ -32,13 +32,14 @@ * AES-256-GCM. * *

Every workflow input, output, and activity parameter is encrypted before being written to - * Cadence history. Without the key, the data stored by the Cadence server — including any operator - * browsing workflow history — is completely opaque. + * Cadence history. Without the key, payloads stored by the Cadence server are unreadable to + * operators browsing workflow history. Logs, metrics, and search attributes are separate disclosure + * surfaces and must be handled separately. * - *

Output layout: {@code nonce(12 bytes) || ciphertext+tag(16 bytes)}. The random nonce means - * the same plaintext produces different ciphertext on every call, preventing replay detection by - * an attacker who observes Cadence history. The GCM authentication tag ensures any ciphertext - * tampering is detected at decode time. + *

Output layout: {@code nonce(12 bytes) || ciphertext || tag(16 bytes)}. The random nonce means + * the same plaintext produces different ciphertext on every call, which preserves semantic security + * for repeated payloads. The GCM authentication tag ensures any ciphertext tampering is detected at + * decode time. */ public final class EncryptedJsonDataConverter implements DataConverter { @@ -51,8 +52,8 @@ public final class EncryptedJsonDataConverter implements DataConverter { private final SecureRandom random = new SecureRandom(); /** - * @param keyBytes 32-byte AES-256 key. The caller is responsible for sourcing this from a - * secrets manager in production; see {@link EncryptionKeyLoader}. + * @param keyBytes 32-byte AES-256 key. The caller is responsible for sourcing this from a secrets + * manager in production; see {@link EncryptionKeyLoader}. * @throws IllegalArgumentException if the key is not 32 bytes. */ public EncryptedJsonDataConverter(byte[] keyBytes) { diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/README.md b/src/main/java/com/uber/cadence/samples/dataconverter/README.md index 1492847a..8c9691ac 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/README.md +++ b/src/main/java/com/uber/cadence/samples/dataconverter/README.md @@ -1,6 +1,6 @@ # DataConverter Samples -Three production-ready patterns for custom `DataConverter` implementations in the Cadence Java client: **compression**, **encryption**, and **S3 / claim-check offload**. A `DataConverter` controls how every workflow input, output, and activity parameter is serialized before it is written to Cadence history — making it the right place to add compression, encryption, or external offloading without changing any workflow or activity code. +Three practical patterns for custom `DataConverter` implementations in the Cadence Java client: **compression**, **encryption**, and **BlobStore / S3 claim-check offload**. A `DataConverter` controls how every workflow input, output, and activity parameter is serialized before it is written to Cadence history — making it the right place to add compression, encryption, or external offloading without changing any workflow or activity code. ## What is a DataConverter? @@ -60,7 +60,7 @@ Run **one** of the starters per sample run. Each starts a new workflow execution ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.EncryptionStarter ``` -**S3 offload** — claim-check pattern: +**S3 offload** — claim-check pattern with a zero-config local `BlobStore`: ```bash ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.S3OffloadStarter @@ -72,21 +72,21 @@ You can also start any of the three from the Cadence CLI; the commands are print ## Compression Sample -`CompressedDataConverterWorkflow` demonstrates gzip-over-JSON compression. For repetitive JSON data this typically achieves 60–80% size reduction, lowering storage cost and bandwidth for large workflow payloads. The converter is implemented in [`CompressedJsonDataConverter.java`](CompressedJsonDataConverter.java) — it wraps `JsonDataConverter.getInstance()` and post-processes the resulting bytes through `java.util.zip.GZIP*Stream`. +`CompressedDataConverterWorkflow` demonstrates gzip-over-JSON compression. For repetitive JSON data this typically achieves 60–80% size reduction, lowering storage cost and bandwidth for large workflow payloads. The converter is implemented in [`CompressedJsonDataConverter.java`](CompressedJsonDataConverter.java) — it wraps `JsonDataConverter.getInstance()`, post-processes the resulting bytes through `java.util.zip.GZIP*Stream`, and caps decompressed output to avoid unbounded memory growth on malformed input. - **Task list:** `data-compression` -- **Workflow type:** `CompressionDataConverterWorkflow` +- **Workflow type:** `CompressedDataConverterWorkflow` --- ## Encryption Sample -`EncryptedDataConverterWorkflow` demonstrates AES-256-GCM encryption. Every workflow input, output, and activity parameter is encrypted before being written to Cadence history. Without the key, the data stored by the Cadence server — including any operators browsing workflow history — is completely opaque. +`EncryptedDataConverterWorkflow` demonstrates AES-256-GCM encryption. Every workflow input, output, and activity parameter is encrypted before being written to Cadence history. Without the key, payloads stored by the Cadence server are unreadable to operators browsing workflow history. Logs, metrics, search attributes, and application output are separate disclosure surfaces. The sample uses a `SensitiveCustomerRecord` containing realistic PII and PHI fields (name, email, SSN, credit card, medical notes) to make the use case concrete. - **Task list:** `data-encryption` -- **Workflow type:** `EncryptionDataConverterWorkflow` +- **Workflow type:** `EncryptedDataConverterWorkflow` ### Encryption key @@ -101,23 +101,23 @@ export CADENCE_ENCRYPTION_KEY=$(openssl rand -hex 32) ### How AES-256-GCM works -- `toData`: JSON-encode arguments → generate a 12-byte random nonce → `Cipher.doFinal` with `AES/GCM/NoPadding` → return `nonce || ciphertext+tag`. +- `toData`: JSON-encode arguments → generate a 12-byte random nonce → `Cipher.doFinal` with `AES/GCM/NoPadding` → return `nonce || ciphertext || tag`. - `fromData` / `fromDataArray`: split nonce from input → `Cipher.doFinal` (decrypt) → JSON-decode. -The GCM authentication tag (16 bytes) ensures any ciphertext tampering is detected. The random nonce means the same plaintext produces different ciphertext on every call, preventing replay detection by an attacker observing Cadence history. +The GCM authentication tag (16 bytes) ensures any ciphertext tampering is detected. The random nonce means the same plaintext produces different ciphertext on every call, which preserves semantic security for repeated payloads. --- ## S3 Offload Sample (claim-check pattern) -`S3OffloadDataConverterWorkflow` demonstrates the *claim-check* pattern: payloads larger than a configurable threshold are stored in an external [`BlobStore`](BlobStore.java) and only a small reference (a few dozen bytes) travels through Cadence workflow history. This solves Cadence's per-payload size limits (~2 MB) for workflows that pass very large datasets between the workflow and its activities. +`S3OffloadDataConverterWorkflow` demonstrates the *claim-check* pattern: payloads larger than a configurable threshold are stored in an external [`BlobStore`](BlobStore.java) and only a small reference (a few dozen bytes) travels through Cadence workflow history. The runnable sample uses [`LocalFsBlobStore`](LocalFsBlobStore.java) so it works without cloud credentials; the same abstraction can be backed by S3 in production. This solves Cadence's per-payload size limits (~2 MB) for workflows that pass very large datasets between the workflow and its activities. - **Task list:** `data-s3` - **Workflow type:** `S3OffloadDataConverterWorkflow` ### How it works -- `toData`: JSON-encode → if `len(json) > thresholdBytes`, upload to `BlobStore` under a SHA-256 key and return `0x01 || {"__s3_ref":"/"}`. Otherwise return `0x00 || json` inline. +- `toData`: JSON-encode → if `len(json) > thresholdBytes`, upload to `BlobStore` under a SHA-256 key and return `0x01 || {"s3Ref":"/"}`. Otherwise return `0x00 || json` inline. - `fromData` / `fromDataArray`: read prefix byte → if `0x01`, fetch from `BlobStore` and decode; if `0x00`, decode inline. SHA-256-of-payload is used as the key so `toData` is idempotent across Cadence workflow replays. Using a fresh UUID per call would write a new orphaned blob on every replay. @@ -150,9 +150,9 @@ You can also point the SDK at [LocalStack](https://localstack.cloud/) or [MinIO] |---------|----------| | **Compression** | Large repetitive JSON payloads; reducing storage cost without confidentiality requirements | | **Encryption** | PII, PHI, secrets, or any data that must be unreadable in Cadence history | -| **S3 Offload** | Payloads approaching Cadence's size limits; binary or non-JSON data; cost-conscious archival | +| **BlobStore / S3 Offload** | Payloads approaching Cadence's size limits; binary or non-JSON data; cost-conscious archival | -Patterns can be composed: encrypt-then-compress, or encrypt-then-offload to S3 for maximum security and minimum history size. +Patterns can be composed, but order matters. Compress before encrypting when size reduction is a goal; encrypt before offloading when the external store should only receive ciphertext. ## Source layout From 3bcb571eb2d716e1ed173d7126769b5583b8f9ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= Date: Mon, 11 May 2026 10:55:05 -0700 Subject: [PATCH 05/11] style(dataconverter): apply java formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “Kevin” --- .../CompressedDataConverterWorkflow.java | 36 ++++++++++--------- .../dataconverter/DataConverterSupport.java | 3 +- .../dataconverter/EncryptionKeyLoader.java | 7 ++-- .../S3OffloadDataConverterWorkflow.java | 17 +++++---- .../dataconverter/S3OffloadStarter.java | 4 +-- 5 files changed, 38 insertions(+), 29 deletions(-) diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java index 5e699581..14293978 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java @@ -33,8 +33,8 @@ * input, output, and activity parameter by {@link CompressedJsonDataConverter}, which is wired in * at the worker by {@link DataConverterWorker}. * - *

The workflow takes no inputs and builds its own large payload internally so it can be - * started from the Cadence CLI without bundling a custom converter into the caller. + *

The workflow takes no inputs and builds its own large payload internally so it can be started + * from the Cadence CLI without bundling a custom converter into the caller. */ public final class CompressedDataConverterWorkflow { @@ -44,8 +44,8 @@ private CompressedDataConverterWorkflow() {} /** * A complex data structure with nested objects and arrays designed to demonstrate compression - * benefits. Fields are public + have no-arg constructors so the JSON data converter can - * serialize and deserialize them. + * benefits. Fields are public + have no-arg constructors so the JSON data converter can serialize + * and deserialize them. */ public static final class LargePayload { public String id; @@ -149,9 +149,10 @@ public static LargePayload createLargePayload() { LargePayload p = new LargePayload(); p.id = "large_payload_001"; p.name = "Comprehensive Product Catalog"; - p.description = repeat( - "This is a comprehensive product catalog containing thousands of items with detailed descriptions, specifications, and user reviews. Each item includes pricing information, inventory status, and customer feedback. The catalog is designed to provide complete information for customers making purchasing decisions. ", - 50); + p.description = + repeat( + "This is a comprehensive product catalog containing thousands of items with detailed descriptions, specifications, and user reviews. Each item includes pricing information, inventory status, and customer feedback. The catalog is designed to provide complete information for customers making purchasing decisions. ", + 50); p.metadata = new LinkedHashMap<>(); for (int i = 0; i < 30; i++) { @@ -167,9 +168,10 @@ public static LargePayload createLargePayload() { Item it = new Item(); it.itemId = "item_" + i; it.title = "High-Quality Product " + i + " with Advanced Features"; - it.description = repeat( - "This is a premium product with exceptional quality and advanced features designed for professional use. It includes comprehensive documentation and support. ", - 10); + it.description = + repeat( + "This is a premium product with exceptional quality and advanced features designed for professional use. It includes comprehensive documentation and support. ", + 10); it.price = 100.0 + i * 10 + (i % 100) / 100.0; it.categories = new ArrayList<>(); it.categories.add("Electronics"); @@ -192,9 +194,10 @@ public static LargePayload createLargePayload() { r.reviewId = "review_" + i + "_" + j; r.userId = "user_" + j; r.rating = 1 + (j % 5); - r.comment = repeat( - "This is a detailed customer review with comprehensive feedback about the product quality, delivery experience, and overall satisfaction. The customer provides specific details about their experience. ", - 3); + r.comment = + repeat( + "This is a detailed customer review with comprehensive feedback about the product quality, delivery experience, and overall satisfaction. The customer provides specific details about their experience. ", + 3); r.helpfulVotes = j * 2; r.notHelpfulVotes = j; r.date = "2024-01-15T10:30:00Z"; @@ -290,9 +293,10 @@ private static String repeat(String s, int n) { public interface WorkflowIface { @WorkflowMethod( - name = DataConverterConstants.COMPRESSION_WORKFLOW_TYPE, - executionStartToCloseTimeoutSeconds = 60, - taskList = DataConverterConstants.TASK_LIST_COMPRESSION) + name = DataConverterConstants.COMPRESSION_WORKFLOW_TYPE, + executionStartToCloseTimeoutSeconds = 60, + taskList = DataConverterConstants.TASK_LIST_COMPRESSION + ) LargePayload run(); } diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java index 1f072fe4..5882b7bc 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java @@ -69,7 +69,8 @@ static boolean printHintIfDomainMissing(Throwable t) { " ./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain"); System.err.println(); System.err.println("Or with Cadence CLI:"); - System.err.println(" cadence --domain " + DataConverterConstants.DOMAIN + " domain register"); + System.err.println( + " cadence --domain " + DataConverterConstants.DOMAIN + " domain register"); System.err.println(); return true; } diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java index 720de46d..f82b695d 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java @@ -24,8 +24,8 @@ * *

Reads the key from the {@code CADENCE_ENCRYPTION_KEY} environment variable as 64 hex * characters (32 bytes). If the env var is unset, falls back to a hardcoded demo key with a - * warning. If the env var is set but invalid, throws — silently falling back to the public demo - * key when the user clearly intended their own key would be a security hole. + * warning. If the env var is set but invalid, throws — silently falling back to the public demo key + * when the user clearly intended their own key would be a security hole. */ public final class EncryptionKeyLoader { @@ -51,7 +51,8 @@ public static byte[] loadEncryptionKey() { try { key = hexDecode(hexKey); } catch (IllegalArgumentException e) { - throw new IllegalStateException("CADENCE_ENCRYPTION_KEY is not valid hex: " + e.getMessage(), e); + throw new IllegalStateException( + "CADENCE_ENCRYPTION_KEY is not valid hex: " + e.getMessage(), e); } if (key.length != 32) { throw new IllegalStateException( diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java index 215fd320..b1c8f2be 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java @@ -28,8 +28,8 @@ import java.util.Map; /** - * Demonstrates the claim-check pattern: payloads larger than the configured threshold are stored - * in an external {@link BlobStore} and only a small reference travels through Cadence history. + * Demonstrates the claim-check pattern: payloads larger than the configured threshold are stored in + * an external {@link BlobStore} and only a small reference travels through Cadence history. * *

The workflow takes no inputs and builds a payload well above the threshold internally so it * can be started from the Cadence CLI and every run exercises the offload path. @@ -66,8 +66,10 @@ public S3DataPoint() {} public static S3LargePayload createS3LargePayload() { S3LargePayload p = new S3LargePayload(); p.jobId = "batch-job-20240115-001"; - p.description = repeat( - "Large telemetry batch job containing sensor readings from the production cluster. ", 10); + p.description = + repeat( + "Large telemetry batch job containing sensor readings from the production cluster. ", + 10); p.dataPoints = new ArrayList<>(200); for (int i = 0; i < 200; i++) { @@ -100,9 +102,10 @@ private static String repeat(String s, int n) { public interface WorkflowIface { @WorkflowMethod( - name = DataConverterConstants.S3_OFFLOAD_WORKFLOW_TYPE, - executionStartToCloseTimeoutSeconds = 60, - taskList = DataConverterConstants.TASK_LIST_S3) + name = DataConverterConstants.S3_OFFLOAD_WORKFLOW_TYPE, + executionStartToCloseTimeoutSeconds = 60, + taskList = DataConverterConstants.TASK_LIST_S3 + ) S3LargePayload run(); } diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java index a8535812..e27da8a6 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java +++ b/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java @@ -26,8 +26,8 @@ * Starts {@link S3OffloadDataConverterWorkflow} (async, fire-and-forget). * *

The workflow takes no inputs and generates its own payload, so this starter does not need to - * use the matching {@link S3OffloadDataConverter}. The same effect can be achieved from the - * Cadence CLI via: + * use the matching {@link S3OffloadDataConverter}. The same effect can be achieved from the Cadence + * CLI via: * *

  * cadence --domain samples-domain \

From dfc0e81eebd934ef7953693c302ec8e21d41161b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= 
Date: Mon, 11 May 2026 10:55:50 -0700
Subject: [PATCH 06/11] test(dataconverter): cover converter sample behavior
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: “Kevin” 
---
 .../DataConverterSamplesTest.java             | 266 ++++++++++++++++++
 1 file changed, 266 insertions(+)
 create mode 100644 src/test/java/com/uber/cadence/samples/dataconverter/DataConverterSamplesTest.java

diff --git a/src/test/java/com/uber/cadence/samples/dataconverter/DataConverterSamplesTest.java b/src/test/java/com/uber/cadence/samples/dataconverter/DataConverterSamplesTest.java
new file mode 100644
index 00000000..77685913
--- /dev/null
+++ b/src/test/java/com/uber/cadence/samples/dataconverter/DataConverterSamplesTest.java
@@ -0,0 +1,266 @@
+/*
+ *  Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ *  Modifications copyright (C) 2017 Uber Technologies, Inc.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"). You may not
+ *  use this file except in compliance with the License. A copy of the License is
+ *  located at
+ *
+ *  http://aws.amazon.com/apache2.0
+ *
+ *  or in the "license" file accompanying this file. This file is distributed on
+ *  an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ *  express or implied. See the License for the specific language governing
+ *  permissions and limitations under the License.
+ */
+
+package com.uber.cadence.samples.dataconverter;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.uber.cadence.client.WorkflowClient;
+import com.uber.cadence.client.WorkflowClientOptions;
+import com.uber.cadence.client.WorkflowOptions;
+import com.uber.cadence.converter.DataConverterException;
+import com.uber.cadence.testing.TestEnvironmentOptions;
+import com.uber.cadence.testing.TestWorkflowEnvironment;
+import com.uber.cadence.worker.Worker;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.stream.Stream;
+import org.junit.After;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class DataConverterSamplesTest {
+
+  @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+  private TestWorkflowEnvironment testEnv;
+
+  @After
+  public void tearDown() {
+    if (testEnv != null) {
+      testEnv.close();
+    }
+  }
+
+  @Test
+  public void testCompressedConverterRoundTrip() {
+    CompressedJsonDataConverter converter = new CompressedJsonDataConverter();
+    CompressedDataConverterWorkflow.LargePayload payload =
+        CompressedDataConverterWorkflow.createLargePayload();
+
+    byte[] encoded = converter.toData(payload);
+    CompressedDataConverterWorkflow.LargePayload decoded =
+        converter.fromData(
+            encoded,
+            CompressedDataConverterWorkflow.LargePayload.class,
+            CompressedDataConverterWorkflow.LargePayload.class);
+
+    assertEquals(payload.id, decoded.id);
+    assertEquals(payload.name, decoded.name);
+    assertEquals(payload.items.size(), decoded.items.size());
+    assertEquals(payload.history.size(), decoded.history.size());
+  }
+
+  @Test
+  public void testCompressedConverterRejectsMalformedPayload() {
+    CompressedJsonDataConverter converter = new CompressedJsonDataConverter();
+
+    try {
+      converter.fromData(new byte[] {1, 2, 3}, String.class, String.class);
+      fail("expected malformed gzip payload to fail");
+    } catch (DataConverterException e) {
+      assertTrue(e.getMessage().contains("gunzip"));
+    }
+  }
+
+  @Test
+  public void testCompressedConverterRejectsPayloadAboveLimit() {
+    CompressedJsonDataConverter encoder = new CompressedJsonDataConverter();
+    CompressedJsonDataConverter decoder = new CompressedJsonDataConverter(8);
+    byte[] encoded = encoder.toData("this string inflates beyond the configured limit");
+
+    try {
+      decoder.fromData(encoded, String.class, String.class);
+      fail("expected oversized decompressed payload to fail");
+    } catch (DataConverterException e) {
+      assertTrue(e.getMessage().contains("maximum size"));
+    }
+  }
+
+  @Test
+  public void testEncryptedConverterRoundTripAndRandomNonce() {
+    EncryptedJsonDataConverter converter =
+        new EncryptedJsonDataConverter(EncryptionKeyLoader.DEMO_ENCRYPTION_KEY);
+    EncryptedDataConverterWorkflow.SensitiveCustomerRecord record =
+        EncryptedDataConverterWorkflow.createSensitiveCustomerRecord();
+
+    byte[] first = converter.toData(record);
+    byte[] second = converter.toData(record);
+
+    assertFalse(Arrays.equals(first, second));
+    EncryptedDataConverterWorkflow.SensitiveCustomerRecord decoded =
+        converter.fromData(
+            first,
+            EncryptedDataConverterWorkflow.SensitiveCustomerRecord.class,
+            EncryptedDataConverterWorkflow.SensitiveCustomerRecord.class);
+    assertEquals(record.customerId, decoded.customerId);
+    assertEquals(record.ssn, decoded.ssn);
+    assertEquals(record.medicalNotes, decoded.medicalNotes);
+  }
+
+  @Test
+  public void testEncryptedConverterRejectsShortCiphertext() {
+    EncryptedJsonDataConverter converter =
+        new EncryptedJsonDataConverter(EncryptionKeyLoader.DEMO_ENCRYPTION_KEY);
+
+    try {
+      converter.fromData(new byte[] {1, 2, 3}, String.class, String.class);
+      fail("expected short ciphertext to fail");
+    } catch (DataConverterException e) {
+      assertTrue(e.getMessage().contains("Ciphertext too short"));
+    }
+  }
+
+  @Test
+  public void testEncryptedConverterWorksInWorkflowEnvironment() {
+    EncryptedJsonDataConverter converter =
+        new EncryptedJsonDataConverter(EncryptionKeyLoader.DEMO_ENCRYPTION_KEY);
+    TestEnvironmentOptions options =
+        new TestEnvironmentOptions.Builder()
+            .setWorkflowClientOptions(
+                WorkflowClientOptions.newBuilder().setDataConverter(converter).build())
+            .build();
+    testEnv = TestWorkflowEnvironment.newInstance(options);
+    Worker worker = testEnv.newWorker(DataConverterConstants.TASK_LIST_ENCRYPTION);
+    worker.registerWorkflowImplementationTypes(EncryptedDataConverterWorkflow.WorkflowImpl.class);
+    worker.registerActivitiesImplementations(new EncryptedDataConverterWorkflow.ActivitiesImpl());
+    testEnv.start();
+
+    WorkflowClient workflowClient =
+        testEnv.newWorkflowClient(
+            WorkflowClientOptions.newBuilder().setDataConverter(converter).build());
+    WorkflowOptions workflowOptions =
+        new WorkflowOptions.Builder()
+            .setTaskList(DataConverterConstants.TASK_LIST_ENCRYPTION)
+            .setExecutionStartToCloseTimeout(Duration.ofMinutes(1))
+            .build();
+    EncryptedDataConverterWorkflow.WorkflowIface workflow =
+        workflowClient.newWorkflowStub(
+            EncryptedDataConverterWorkflow.WorkflowIface.class, workflowOptions);
+
+    EncryptedDataConverterWorkflow.SensitiveCustomerRecord result = workflow.run();
+
+    assertEquals("cust_8a7f3b2e", result.customerId);
+    assertEquals("workflow-processor-v2 (Encrypted)", result.processedBy);
+  }
+
+  @Test
+  public void testS3OffloadConverterInlinesBelowThreshold() {
+    RecordingBlobStore store = new RecordingBlobStore();
+    S3OffloadDataConverter converter = new S3OffloadDataConverter(store, "bucket", 1024);
+
+    byte[] encoded = converter.toData("small");
+    String decoded = converter.fromData(encoded, String.class, String.class);
+
+    assertEquals(S3OffloadDataConverter.INLINE_PREFIX, encoded[0]);
+    assertEquals("small", decoded);
+    assertTrue(store.blobs.isEmpty());
+  }
+
+  @Test
+  public void testS3OffloadConverterOffloadsAndUsesIdempotentReference() {
+    RecordingBlobStore store = new RecordingBlobStore();
+    S3OffloadDataConverter converter = new S3OffloadDataConverter(store, "bucket", 1);
+
+    byte[] first = converter.toData("large enough to offload");
+    byte[] second = converter.toData("large enough to offload");
+    String decoded = converter.fromData(first, String.class, String.class);
+
+    assertEquals(S3OffloadDataConverter.OFFLOAD_PREFIX, first[0]);
+    assertArrayEquals(first, second);
+    assertEquals("large enough to offload", decoded);
+    assertEquals(1, store.blobs.size());
+  }
+
+  @Test
+  public void testS3OffloadConverterRejectsUnknownPrefix() {
+    S3OffloadDataConverter converter =
+        new S3OffloadDataConverter(new RecordingBlobStore(), "bucket", 1);
+
+    try {
+      converter.fromData(new byte[] {0x7f}, String.class, String.class);
+      fail("expected unknown prefix to fail");
+    } catch (DataConverterException e) {
+      assertTrue(e.getMessage().contains("unknown prefix"));
+    }
+  }
+
+  @Test
+  public void testS3OffloadConverterValidatesConstructorInputs() {
+    expectIllegalArgument(() -> new S3OffloadDataConverter(null, "bucket", 1));
+    expectIllegalArgument(() -> new S3OffloadDataConverter(new RecordingBlobStore(), " ", 1));
+    expectIllegalArgument(() -> new S3OffloadDataConverter(new RecordingBlobStore(), "bucket", -1));
+  }
+
+  @Test
+  public void testLocalFsBlobStoreHashesUnsafeKeys() throws Exception {
+    Path baseDir = temporaryFolder.newFolder("blobs").toPath();
+    LocalFsBlobStore store = new LocalFsBlobStore(baseDir);
+    byte[] data = new byte[] {1, 2, 3};
+
+    store.put("../escape", data);
+    store.put(".", data);
+    store.put("bucket\\nested/key", data);
+
+    assertArrayEquals(data, store.get("../escape"));
+    assertArrayEquals(data, store.get("."));
+    assertArrayEquals(data, store.get("bucket\\nested/key"));
+    try (Stream files = Files.list(baseDir)) {
+      assertEquals(3, files.filter(Files::isRegularFile).count());
+    }
+    try (Stream files = Files.list(baseDir)) {
+      assertTrue(files.allMatch(path -> path.getFileName().toString().matches("[0-9a-f]{64}")));
+    }
+  }
+
+  private static void expectIllegalArgument(Runnable runnable) {
+    try {
+      runnable.run();
+      fail("expected IllegalArgumentException");
+    } catch (IllegalArgumentException expected) {
+      // Expected.
+    }
+  }
+
+  private static final class RecordingBlobStore implements BlobStore {
+    final Map blobs = new LinkedHashMap<>();
+
+    @Override
+    public void put(String key, byte[] data) {
+      blobs.put(key, data);
+    }
+
+    @Override
+    public byte[] get(String key) throws IOException {
+      byte[] data = blobs.get(key);
+      if (data == null) {
+        throw new IOException("missing key " + key);
+      }
+      return data;
+    }
+  }
+}

From 4030b93ced55a026343783bc041529a808e08d71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= 
Date: Fri, 15 May 2026 08:28:27 -0700
Subject: [PATCH 07/11] refactor(dataconverter): split samples into per-pattern
 packages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: “Kevin” 
---
 README.md                                     |  30 +-
 .../CompressedDataConverterWorkflow.java      |  16 +-
 .../CompressedJsonDataConverter.java          |   2 +-
 .../CompressionStarter.java                   |  49 +++-
 .../compression/CompressionWorker.java        |  95 +++++++
 .../cadence/samples/compression/README.md     |  61 ++++
 .../dataconverter/DataConverterConstants.java |  64 -----
 .../dataconverter/DataConverterSupport.java   |  80 ------
 .../dataconverter/DataConverterWorker.java    | 219 --------------
 .../cadence/samples/dataconverter/README.md   | 170 -----------
 .../EncryptedDataConverterWorkflow.java       |  14 +-
 .../EncryptedJsonDataConverter.java           |   2 +-
 .../EncryptionKeyLoader.java                  |   2 +-
 .../EncryptionStarter.java                    |  49 +++-
 .../samples/encryption/EncryptionWorker.java  | 106 +++++++
 .../uber/cadence/samples/encryption/README.md |  74 +++++
 .../BlobStore.java                            |   2 +-
 .../LocalFsBlobStore.java                     |   2 +-
 .../uber/cadence/samples/s3offload/README.md  |  79 ++++++
 .../S3OffloadDataConverter.java               |   4 +-
 .../S3OffloadDataConverterWorkflow.java       |  28 +-
 .../S3OffloadStarter.java                     |  49 +++-
 .../samples/s3offload/S3OffloadWorker.java    | 116 ++++++++
 .../CompressedJsonDataConverterTest.java      |  73 +++++
 .../DataConverterSamplesTest.java             | 266 ------------------
 .../EncryptedJsonDataConverterTest.java       | 114 ++++++++
 .../s3offload/S3OffloadDataConverterTest.java | 135 +++++++++
 27 files changed, 1057 insertions(+), 844 deletions(-)
 rename src/main/java/com/uber/cadence/samples/{dataconverter => compression}/CompressedDataConverterWorkflow.java (95%)
 rename src/main/java/com/uber/cadence/samples/{dataconverter => compression}/CompressedJsonDataConverter.java (98%)
 rename src/main/java/com/uber/cadence/samples/{dataconverter => compression}/CompressionStarter.java (50%)
 create mode 100644 src/main/java/com/uber/cadence/samples/compression/CompressionWorker.java
 create mode 100644 src/main/java/com/uber/cadence/samples/compression/README.md
 delete mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java
 delete mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java
 delete mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java
 delete mode 100644 src/main/java/com/uber/cadence/samples/dataconverter/README.md
 rename src/main/java/com/uber/cadence/samples/{dataconverter => encryption}/EncryptedDataConverterWorkflow.java (92%)
 rename src/main/java/com/uber/cadence/samples/{dataconverter => encryption}/EncryptedJsonDataConverter.java (99%)
 rename src/main/java/com/uber/cadence/samples/{dataconverter => encryption}/EncryptionKeyLoader.java (98%)
 rename src/main/java/com/uber/cadence/samples/{dataconverter => encryption}/EncryptionStarter.java (50%)
 create mode 100644 src/main/java/com/uber/cadence/samples/encryption/EncryptionWorker.java
 create mode 100644 src/main/java/com/uber/cadence/samples/encryption/README.md
 rename src/main/java/com/uber/cadence/samples/{dataconverter => s3offload}/BlobStore.java (96%)
 rename src/main/java/com/uber/cadence/samples/{dataconverter => s3offload}/LocalFsBlobStore.java (98%)
 create mode 100644 src/main/java/com/uber/cadence/samples/s3offload/README.md
 rename src/main/java/com/uber/cadence/samples/{dataconverter => s3offload}/S3OffloadDataConverter.java (99%)
 rename src/main/java/com/uber/cadence/samples/{dataconverter => s3offload}/S3OffloadDataConverterWorkflow.java (83%)
 rename src/main/java/com/uber/cadence/samples/{dataconverter => s3offload}/S3OffloadStarter.java (50%)
 create mode 100644 src/main/java/com/uber/cadence/samples/s3offload/S3OffloadWorker.java
 create mode 100644 src/test/java/com/uber/cadence/samples/compression/CompressedJsonDataConverterTest.java
 delete mode 100644 src/test/java/com/uber/cadence/samples/dataconverter/DataConverterSamplesTest.java
 create mode 100644 src/test/java/com/uber/cadence/samples/encryption/EncryptedJsonDataConverterTest.java
 create mode 100644 src/test/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterTest.java

diff --git a/README.md b/README.md
index d0229489..28eb3d01 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,10 @@ These samples demonstrate various capabilities of Java Cadence client and server
 
 * **Custom Workflow Controls** ([`com.uber.cadence.samples.query`](src/main/java/com/uber/cadence/samples/query/)) — workflow queries that return **markdown** for Cadence Web (Markdoc buttons that **signal** workflows or **start** new workflows). **Requires Cadence Web v4.0.14+.** Copy-paste run instructions: [query samples README](src/main/java/com/uber/cadence/samples/query/README.md).
 
-* **DataConverter Samples** ([`com.uber.cadence.samples.dataconverter`](src/main/java/com/uber/cadence/samples/dataconverter/)) — three custom `DataConverter` patterns (gzip compression, AES-256-GCM encryption, and BlobStore / S3 claim-check offload) that transparently transform every workflow input, output, and activity parameter. Copy-paste run instructions: [dataconverter samples README](src/main/java/com/uber/cadence/samples/dataconverter/README.md).
+* **DataConverter Samples** — three independent custom `DataConverter` patterns that transparently transform every workflow input, output, and activity parameter. Each lives in its own package and is fully standalone, so you can copy any one of them into your own project:
+    * **Compression** ([`com.uber.cadence.samples.compression`](src/main/java/com/uber/cadence/samples/compression/)) — gzip-over-JSON; typically 60-80% size reduction for repetitive payloads. [README](src/main/java/com/uber/cadence/samples/compression/README.md).
+    * **Encryption** ([`com.uber.cadence.samples.encryption`](src/main/java/com/uber/cadence/samples/encryption/)) — AES-256-GCM so payloads in Cadence history are unreadable without the key. [README](src/main/java/com/uber/cadence/samples/encryption/README.md).
+    * **S3 / claim-check offload** ([`com.uber.cadence.samples.s3offload`](src/main/java/com/uber/cadence/samples/s3offload/)) — payloads above a threshold are stored in an external `BlobStore`; only a small reference travels through history. [README](src/main/java/com/uber/cadence/samples/s3offload/README.md).
 
 ## Get the Samples
 
@@ -143,17 +146,28 @@ In Cadence Web, open the workflow → **Query** tab → run query **`Signal`**,
 
 ### DataConverter Samples
 
-Three samples (compression, encryption, S3 offload) demonstrating custom `DataConverter` implementations. One worker hosts all three on three task lists. See [src/main/java/com/uber/cadence/samples/dataconverter/README.md](src/main/java/com/uber/cadence/samples/dataconverter/README.md) for full details, encryption-key configuration, and S3 swap instructions.
+Three independent samples demonstrating custom `DataConverter` implementations. Each sample is self-contained in its own package with its own worker, starter, task list, and README. Pick one to run, or run all three in parallel — they share nothing.
 
-Worker (hosts all three samples; prints per-sample stats banners on startup):
+#### Compression (gzip-over-JSON)
 
-    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.DataConverterWorker
+See [src/main/java/com/uber/cadence/samples/compression/README.md](src/main/java/com/uber/cadence/samples/compression/README.md).
 
-Starters (pick one per run; each starts a new workflow execution and exits):
+    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.compression.CompressionWorker
+    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.compression.CompressionStarter
 
-    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.CompressionStarter
-    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.EncryptionStarter
-    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.S3OffloadStarter
+#### Encryption (AES-256-GCM)
+
+See [src/main/java/com/uber/cadence/samples/encryption/README.md](src/main/java/com/uber/cadence/samples/encryption/README.md) for the `CADENCE_ENCRYPTION_KEY` env var.
+
+    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.encryption.EncryptionWorker
+    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.encryption.EncryptionStarter
+
+#### S3 / claim-check offload
+
+See [src/main/java/com/uber/cadence/samples/s3offload/README.md](src/main/java/com/uber/cadence/samples/s3offload/README.md) for the AWS SDK swap-in instructions.
+
+    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.s3offload.S3OffloadWorker
+    ./gradlew -q execute -PmainClass=com.uber.cadence.samples.s3offload.S3OffloadStarter
 
 ### Trip Booking
 
diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/compression/CompressedDataConverterWorkflow.java
similarity index 95%
rename from src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java
rename to src/main/java/com/uber/cadence/samples/compression/CompressedDataConverterWorkflow.java
index 14293978..0e399428 100644
--- a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedDataConverterWorkflow.java
+++ b/src/main/java/com/uber/cadence/samples/compression/CompressedDataConverterWorkflow.java
@@ -15,7 +15,7 @@
  *  permissions and limitations under the License.
  */
 
-package com.uber.cadence.samples.dataconverter;
+package com.uber.cadence.samples.compression;
 
 import com.uber.cadence.activity.ActivityMethod;
 import com.uber.cadence.activity.ActivityOptions;
@@ -31,7 +31,7 @@
  * Demonstrates gzip-over-JSON compression as a Cadence {@code DataConverter}. The workflow itself
  * is unchanged from a plain Cadence workflow — the compression is applied transparently to every
  * input, output, and activity parameter by {@link CompressedJsonDataConverter}, which is wired in
- * at the worker by {@link DataConverterWorker}.
+ * at the worker by {@link CompressionWorker}.
  *
  * 

The workflow takes no inputs and builds its own large payload internally so it can be started * from the Cadence CLI without bundling a custom converter into the caller. @@ -40,6 +40,14 @@ public final class CompressedDataConverterWorkflow { private CompressedDataConverterWorkflow() {} + /** Task list polled by {@link CompressionWorker}. */ + public static final String TASK_LIST = "data-compression"; + + /** + * Registered workflow type, used for both {@code @WorkflowMethod} and CLI {@code workflow start}. + */ + public static final String WORKFLOW_TYPE = "CompressedDataConverterWorkflow"; + // ---------------- POJOs ---------------- /** @@ -293,9 +301,9 @@ private static String repeat(String s, int n) { public interface WorkflowIface { @WorkflowMethod( - name = DataConverterConstants.COMPRESSION_WORKFLOW_TYPE, + name = WORKFLOW_TYPE, executionStartToCloseTimeoutSeconds = 60, - taskList = DataConverterConstants.TASK_LIST_COMPRESSION + taskList = TASK_LIST ) LargePayload run(); } diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java b/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java similarity index 98% rename from src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java rename to src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java index dcf600d6..4c734253 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/CompressedJsonDataConverter.java +++ b/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.compression; import com.uber.cadence.converter.DataConverter; import com.uber.cadence.converter.DataConverterException; diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java b/src/main/java/com/uber/cadence/samples/compression/CompressionStarter.java similarity index 50% rename from src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java rename to src/main/java/com/uber/cadence/samples/compression/CompressionStarter.java index f2bf0172..1c08e78d 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/CompressionStarter.java +++ b/src/main/java/com/uber/cadence/samples/compression/CompressionStarter.java @@ -15,10 +15,14 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.compression; import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowClientOptions; import com.uber.cadence.client.WorkflowOptions; +import com.uber.cadence.internal.compatibility.Thrift2ProtoAdapter; +import com.uber.cadence.internal.compatibility.proto.serviceclient.IGrpcServiceStubs; +import com.uber.cadence.samples.common.SampleConstants; import java.time.Duration; import java.util.UUID; @@ -43,10 +47,13 @@ private CompressionStarter() {} public static void main(String[] args) { try { - WorkflowClient client = DataConverterSupport.newWorkflowClient(); + WorkflowClient client = + WorkflowClient.newInstance( + new Thrift2ProtoAdapter(IGrpcServiceStubs.newInstance()), + WorkflowClientOptions.newBuilder().setDomain(SampleConstants.DOMAIN).build()); WorkflowOptions options = new WorkflowOptions.Builder() - .setTaskList(DataConverterConstants.TASK_LIST_COMPRESSION) + .setTaskList(CompressedDataConverterWorkflow.TASK_LIST) .setExecutionStartToCloseTimeout(Duration.ofMinutes(1)) .setWorkflowId("compression-" + UUID.randomUUID()) .build(); @@ -56,15 +63,45 @@ public static void main(String[] args) { WorkflowClient.start(workflow::run); System.out.println( - "Started CompressedDataConverterWorkflow on task list \"" - + DataConverterConstants.TASK_LIST_COMPRESSION + "Started " + + CompressedDataConverterWorkflow.WORKFLOW_TYPE + + " on task list \"" + + CompressedDataConverterWorkflow.TASK_LIST + "\"."); System.exit(0); } catch (RuntimeException e) { - if (DataConverterSupport.printHintIfDomainMissing(e)) { + if (printHintIfDomainMissing(e)) { System.exit(1); } throw e; } } + + /** + * Prints a copy-paste hint when the Cadence error indicates the sample domain has not been + * registered. + * + * @return true if {@code t} was a missing-domain error and a hint was printed (caller should + * exit). + */ + static boolean printHintIfDomainMissing(Throwable t) { + for (Throwable c = t; c != null; c = c.getCause()) { + String m = c.getMessage(); + if (m != null && m.contains("Domain") && m.contains("does not exist")) { + System.err.println(); + System.err.println( + "Cadence reported that the domain \"" + SampleConstants.DOMAIN + "\" does not exist."); + System.err.println("Register it once against your cluster, then run this again:"); + System.err.println(); + System.err.println( + " ./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain"); + System.err.println(); + System.err.println("Or with Cadence CLI:"); + System.err.println(" cadence --domain " + SampleConstants.DOMAIN + " domain register"); + System.err.println(); + return true; + } + } + return false; + } } diff --git a/src/main/java/com/uber/cadence/samples/compression/CompressionWorker.java b/src/main/java/com/uber/cadence/samples/compression/CompressionWorker.java new file mode 100644 index 00000000..fa3f0d61 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/compression/CompressionWorker.java @@ -0,0 +1,95 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.compression; + +import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowClientOptions; +import com.uber.cadence.converter.DataConverter; +import com.uber.cadence.converter.JsonDataConverter; +import com.uber.cadence.internal.compatibility.Thrift2ProtoAdapter; +import com.uber.cadence.internal.compatibility.proto.serviceclient.IGrpcServiceStubs; +import com.uber.cadence.samples.common.SampleConstants; +import com.uber.cadence.worker.Worker; +import com.uber.cadence.worker.WorkerFactory; + +/** + * Hosts the gzip-compression sample worker. Constructs a {@link WorkflowClient} configured with + * {@link CompressedJsonDataConverter} so every workflow input, output, and activity parameter is + * transparently gzip-compressed in Cadence history. On startup it prints a stats banner showing the + * before/after size of the sample payload so the benefit is visible at a glance. + */ +public final class CompressionWorker { + + private CompressionWorker() {} + + public static void main(String[] args) { + DataConverter converter = new CompressedJsonDataConverter(); + WorkflowClient client = + WorkflowClient.newInstance( + new Thrift2ProtoAdapter(IGrpcServiceStubs.newInstance()), + WorkflowClientOptions.newBuilder() + .setDomain(SampleConstants.DOMAIN) + .setDataConverter(converter) + .build()); + + WorkerFactory factory = WorkerFactory.newInstance(client); + Worker worker = factory.newWorker(CompressedDataConverterWorkflow.TASK_LIST); + worker.registerWorkflowImplementationTypes(CompressedDataConverterWorkflow.WorkflowImpl.class); + worker.registerActivitiesImplementations(new CompressedDataConverterWorkflow.ActivitiesImpl()); + factory.start(); + + printCompressionStats(converter); + + System.out.println( + "CompressionWorker listening on \"" + + CompressedDataConverterWorkflow.TASK_LIST + + "\" (domain \"" + + SampleConstants.DOMAIN + + "\")."); + + Runtime.getRuntime().addShutdownHook(new Thread(factory::shutdown)); + } + + private static void printCompressionStats(DataConverter converter) { + CompressedDataConverterWorkflow.LargePayload payload = + CompressedDataConverterWorkflow.createLargePayload(); + byte[] originalJson = JsonDataConverter.getInstance().toData(payload); + byte[] compressed = converter.toData(payload); + int originalSize = originalJson == null ? 0 : originalJson.length; + int compressedSize = compressed == null ? 0 : compressed.length; + double pct = originalSize == 0 ? 0.0 : (1.0 - (double) compressedSize / originalSize) * 100.0; + + System.out.println(); + System.out.println("=== Compression Sample Statistics ==="); + System.out.printf( + "Original JSON size: %d bytes (%.2f KB)%n", originalSize, originalSize / 1024.0); + System.out.printf( + "Compressed size: %d bytes (%.2f KB)%n", compressedSize, compressedSize / 1024.0); + System.out.printf("Compression ratio: %.2f%% reduction%n", pct); + System.out.printf( + "Space saved: %d bytes (%.2f KB)%n", + originalSize - compressedSize, (originalSize - compressedSize) / 1024.0); + System.out.printf( + "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", + SampleConstants.DOMAIN, + CompressedDataConverterWorkflow.TASK_LIST, + CompressedDataConverterWorkflow.WORKFLOW_TYPE); + System.out.println("====================================="); + System.out.println(); + } +} diff --git a/src/main/java/com/uber/cadence/samples/compression/README.md b/src/main/java/com/uber/cadence/samples/compression/README.md new file mode 100644 index 00000000..46fac32a --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/compression/README.md @@ -0,0 +1,61 @@ +# Compression DataConverter Sample + +A custom Cadence [`DataConverter`](../../../../../../../../README.md) that JSON-encodes workflow data and then gzip-compresses the bytes. For repetitive JSON payloads this typically achieves 60-80% size reduction, lowering storage cost and bandwidth without changing any workflow or activity code. The decode path caps decompressed payloads (default 10 MB) so a malformed input cannot drive unbounded memory growth. + +- **Task list:** `data-compression` +- **Workflow type:** `CompressedDataConverterWorkflow` + +## Prerequisites + +1. Cadence server running (e.g. Docker Compose from the [Cadence repo](https://github.com/uber/cadence)). +2. From the repo root, build: `./gradlew build`. + +### Register the domain (required once per cluster) + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain +``` + +Or with the Cadence CLI: + +```bash +cadence --domain samples-domain domain register +``` + +## Run the worker (terminal 1) + +The worker prints a compression statistics banner showing the before/after sizes of the sample payload, then begins polling the `data-compression` task list: + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.compression.CompressionWorker +``` + +## Start a workflow (terminal 2) + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.compression.CompressionStarter +``` + +Or from the Cadence CLI: + +```bash +cadence --domain samples-domain \ + workflow start \ + --workflow_type CompressedDataConverterWorkflow \ + --tl data-compression \ + --et 60 +``` + +## How it works + +- `toData`: JSON-encode the arguments with the standard `JsonDataConverter`, then write the bytes through `java.util.zip.GZIPOutputStream`. +- `fromData` / `fromDataArray`: decompress through `GZIPInputStream` with a configurable max output cap, then delegate to the standard `JsonDataConverter`. + +## Source layout + +| File | Purpose | +|------|---------| +| [`CompressedJsonDataConverter.java`](CompressedJsonDataConverter.java) | The custom `DataConverter` | +| [`CompressedDataConverterWorkflow.java`](CompressedDataConverterWorkflow.java) | Workflow + activity + sample `LargePayload` POJOs and generator | +| [`CompressionWorker.java`](CompressionWorker.java) | Worker main; wires the converter into `WorkflowClientOptions` and prints the stats banner | +| [`CompressionStarter.java`](CompressionStarter.java) | Thin async starter | diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java deleted file mode 100644 index 71c20b2b..00000000 --- a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterConstants.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Modifications copyright (C) 2017 Uber Technologies, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may not - * use this file except in compliance with the License. A copy of the License is - * located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package com.uber.cadence.samples.dataconverter; - -import com.uber.cadence.samples.common.SampleConstants; - -/** - * Shared identifiers for the DataConverter samples. - * - *

Each of the three samples runs on its own task list so it can have its own {@code - * DataConverter}. {@code DataConverter} is bound to a {@code WorkflowClient}, and each task list - * maps to one worker built from one client; that is why one process needs three clients to host all - * three samples. - */ -public final class DataConverterConstants { - - private DataConverterConstants() {} - - /** Cadence domain shared with the rest of the samples (registered via {@code RegisterDomain}). */ - public static final String DOMAIN = SampleConstants.DOMAIN; - - /** Task list for the gzip-compression sample worker. */ - public static final String TASK_LIST_COMPRESSION = "data-compression"; - - /** Task list for the AES-256-GCM encryption sample worker. */ - public static final String TASK_LIST_ENCRYPTION = "data-encryption"; - - /** Task list for the S3 / claim-check offload sample worker. */ - public static final String TASK_LIST_S3 = "data-s3"; - - /** Registered workflow type for {@code CompressedDataConverterWorkflow}. */ - public static final String COMPRESSION_WORKFLOW_TYPE = "CompressedDataConverterWorkflow"; - - /** Registered workflow type for {@code EncryptedDataConverterWorkflow}. */ - public static final String ENCRYPTION_WORKFLOW_TYPE = "EncryptedDataConverterWorkflow"; - - /** Registered workflow type for {@code S3OffloadDataConverterWorkflow}. */ - public static final String S3_OFFLOAD_WORKFLOW_TYPE = "S3OffloadDataConverterWorkflow"; - - /** Logical bucket / prefix embedded in S3-offload reference keys. */ - public static final String S3_BUCKET = "data-s3"; - - /** - * Payloads larger than this are offloaded to the BlobStore by {@link S3OffloadDataConverter}. - * Cadence's default max payload is roughly 2 MB; the threshold is set intentionally low so the - * demo workflow comfortably triggers offloading. - */ - public static final int S3_DEFAULT_THRESHOLD_BYTES = 4096; -} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java deleted file mode 100644 index 5882b7bc..00000000 --- a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterSupport.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Modifications copyright (C) 2017 Uber Technologies, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may not - * use this file except in compliance with the License. A copy of the License is - * located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package com.uber.cadence.samples.dataconverter; - -import com.uber.cadence.client.WorkflowClient; -import com.uber.cadence.client.WorkflowClientOptions; -import com.uber.cadence.converter.DataConverter; -import com.uber.cadence.internal.compatibility.Thrift2ProtoAdapter; -import com.uber.cadence.internal.compatibility.proto.serviceclient.IGrpcServiceStubs; - -/** Shared client factory and friendly errors for the DataConverter sample starters and worker. */ -final class DataConverterSupport { - - private DataConverterSupport() {} - - /** - * Builds a WorkflowClient with the given DataConverter on the configured domain. The Worker - * derived from this client will use the same converter for all serialization. - */ - static WorkflowClient newWorkflowClient(DataConverter dataConverter) { - WorkflowClientOptions.Builder builder = - WorkflowClientOptions.newBuilder().setDomain(DataConverterConstants.DOMAIN); - if (dataConverter != null) { - builder.setDataConverter(dataConverter); - } - return WorkflowClient.newInstance( - new Thrift2ProtoAdapter(IGrpcServiceStubs.newInstance()), builder.build()); - } - - /** Builds a WorkflowClient using the default JSON DataConverter. */ - static WorkflowClient newWorkflowClient() { - return newWorkflowClient(null); - } - - /** - * Prints a copy-paste hint when the Cadence error indicates the sample domain has not been - * registered. - * - * @return true if {@code t} was a missing-domain error and a hint was printed (caller should - * exit). - */ - static boolean printHintIfDomainMissing(Throwable t) { - for (Throwable c = t; c != null; c = c.getCause()) { - String m = c.getMessage(); - if (m != null && m.contains("Domain") && m.contains("does not exist")) { - System.err.println(); - System.err.println( - "Cadence reported that the domain \"" - + DataConverterConstants.DOMAIN - + "\" does not exist."); - System.err.println("Register it once against your cluster, then run this again:"); - System.err.println(); - System.err.println( - " ./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain"); - System.err.println(); - System.err.println("Or with Cadence CLI:"); - System.err.println( - " cadence --domain " + DataConverterConstants.DOMAIN + " domain register"); - System.err.println(); - return true; - } - } - return false; - } -} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java b/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java deleted file mode 100644 index a0965bff..00000000 --- a/src/main/java/com/uber/cadence/samples/dataconverter/DataConverterWorker.java +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Modifications copyright (C) 2017 Uber Technologies, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may not - * use this file except in compliance with the License. A copy of the License is - * located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package com.uber.cadence.samples.dataconverter; - -import com.uber.cadence.client.WorkflowClient; -import com.uber.cadence.converter.DataConverter; -import com.uber.cadence.converter.JsonDataConverter; -import com.uber.cadence.worker.Worker; -import com.uber.cadence.worker.WorkerFactory; - -/** - * Hosts all three DataConverter sample workers in a single process. Each sample uses its own {@link - * WorkflowClient} (and therefore its own {@link WorkerFactory}) because the {@code DataConverter} - * is bound to {@code WorkflowClientOptions}. - * - *

On startup the worker prints a stats banner per sample showing the visible benefit of each - * pattern (compression ratio, ciphertext preview, claim-check size), then begins polling all three - * task lists in the background. - */ -public final class DataConverterWorker { - - private DataConverterWorker() {} - - public static void main(String[] args) { - DataConverter compressionConverter = new CompressedJsonDataConverter(); - DataConverter encryptionConverter = - new EncryptedJsonDataConverter(EncryptionKeyLoader.loadEncryptionKey()); - LocalFsBlobStore blobStore = new LocalFsBlobStore(); - DataConverter s3Converter = - new S3OffloadDataConverter( - blobStore, - DataConverterConstants.S3_BUCKET, - DataConverterConstants.S3_DEFAULT_THRESHOLD_BYTES); - - WorkerFactory compressionFactory = startCompressionWorker(compressionConverter); - WorkerFactory encryptionFactory = startEncryptionWorker(encryptionConverter); - WorkerFactory s3Factory = startS3OffloadWorker(s3Converter); - - printCompressionStats(compressionConverter); - printEncryptionStats(encryptionConverter); - printS3OffloadStats(blobStore); - - System.out.println( - "DataConverterWorker listening on \"" - + DataConverterConstants.TASK_LIST_COMPRESSION - + "\", \"" - + DataConverterConstants.TASK_LIST_ENCRYPTION - + "\", \"" - + DataConverterConstants.TASK_LIST_S3 - + "\" (domain \"" - + DataConverterConstants.DOMAIN - + "\")."); - - // Keep references so the factories aren't GC'd while the process runs. - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - compressionFactory.shutdown(); - encryptionFactory.shutdown(); - s3Factory.shutdown(); - })); - } - - private static WorkerFactory startCompressionWorker(DataConverter converter) { - WorkflowClient client = DataConverterSupport.newWorkflowClient(converter); - WorkerFactory factory = WorkerFactory.newInstance(client); - Worker worker = factory.newWorker(DataConverterConstants.TASK_LIST_COMPRESSION); - worker.registerWorkflowImplementationTypes(CompressedDataConverterWorkflow.WorkflowImpl.class); - worker.registerActivitiesImplementations(new CompressedDataConverterWorkflow.ActivitiesImpl()); - factory.start(); - return factory; - } - - private static WorkerFactory startEncryptionWorker(DataConverter converter) { - WorkflowClient client = DataConverterSupport.newWorkflowClient(converter); - WorkerFactory factory = WorkerFactory.newInstance(client); - Worker worker = factory.newWorker(DataConverterConstants.TASK_LIST_ENCRYPTION); - worker.registerWorkflowImplementationTypes(EncryptedDataConverterWorkflow.WorkflowImpl.class); - worker.registerActivitiesImplementations(new EncryptedDataConverterWorkflow.ActivitiesImpl()); - factory.start(); - return factory; - } - - private static WorkerFactory startS3OffloadWorker(DataConverter converter) { - WorkflowClient client = DataConverterSupport.newWorkflowClient(converter); - WorkerFactory factory = WorkerFactory.newInstance(client); - Worker worker = factory.newWorker(DataConverterConstants.TASK_LIST_S3); - worker.registerWorkflowImplementationTypes(S3OffloadDataConverterWorkflow.WorkflowImpl.class); - worker.registerActivitiesImplementations(new S3OffloadDataConverterWorkflow.ActivitiesImpl()); - factory.start(); - return factory; - } - - // ---------------- Stats banners ---------------- - - private static void printCompressionStats(DataConverter converter) { - CompressedDataConverterWorkflow.LargePayload payload = - CompressedDataConverterWorkflow.createLargePayload(); - byte[] originalJson = JsonDataConverter.getInstance().toData(payload); - byte[] compressed = converter.toData(payload); - int originalSize = originalJson == null ? 0 : originalJson.length; - int compressedSize = compressed == null ? 0 : compressed.length; - double pct = originalSize == 0 ? 0.0 : (1.0 - (double) compressedSize / originalSize) * 100.0; - - System.out.println(); - System.out.println("=== Compression Sample Statistics ==="); - System.out.printf( - "Original JSON size: %d bytes (%.2f KB)%n", originalSize, originalSize / 1024.0); - System.out.printf( - "Compressed size: %d bytes (%.2f KB)%n", compressedSize, compressedSize / 1024.0); - System.out.printf("Compression ratio: %.2f%% reduction%n", pct); - System.out.printf( - "Space saved: %d bytes (%.2f KB)%n", - originalSize - compressedSize, (originalSize - compressedSize) / 1024.0); - System.out.printf( - "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", - DataConverterConstants.DOMAIN, - DataConverterConstants.TASK_LIST_COMPRESSION, - DataConverterConstants.COMPRESSION_WORKFLOW_TYPE); - System.out.println("====================================="); - System.out.println(); - } - - private static void printEncryptionStats(DataConverter converter) { - EncryptedDataConverterWorkflow.SensitiveCustomerRecord record = - EncryptedDataConverterWorkflow.createSensitiveCustomerRecord(); - byte[] plaintext = JsonDataConverter.getInstance().toData(record); - byte[] ciphertext = converter.toData(record); - int plaintextSize = plaintext == null ? 0 : plaintext.length; - int ciphertextSize = ciphertext == null ? 0 : ciphertext.length; - String preview = ciphertext == null ? "" : hexPreview(ciphertext, 40); - - System.out.println(); - System.out.println("=== Encryption Sample Statistics ==="); - System.out.printf("Plaintext JSON size: %d bytes%n", plaintextSize); - System.out.printf( - "Encrypted payload: %d bytes (growth: %d bytes vs plaintext JSON)%n", - ciphertextSize, ciphertextSize - plaintextSize); - System.out.printf("Ciphertext preview: %s%n", preview); - System.out.printf( - "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", - DataConverterConstants.DOMAIN, - DataConverterConstants.TASK_LIST_ENCRYPTION, - DataConverterConstants.ENCRYPTION_WORKFLOW_TYPE); - System.out.println("===================================="); - System.out.println(); - } - - private static void printS3OffloadStats(LocalFsBlobStore store) { - S3OffloadDataConverterWorkflow.S3LargePayload payload = - S3OffloadDataConverterWorkflow.createS3LargePayload(); - byte[] jsonBytes = JsonDataConverter.getInstance().toData(payload); - int jsonSize = jsonBytes == null ? 0 : jsonBytes.length; - // History footprint = 1 prefix byte + JSON envelope {"s3Ref":"/"}. - // SHA-256 hex digest is 64 chars; bucket + "/" + 64 hex chars. - int cadenceBytes = - 1 - + ("{\"s3Ref\":\"" - + DataConverterConstants.S3_BUCKET - + "/" - + repeatChar('a', 64) - + "\"}") - .length(); - - System.out.println(); - System.out.println("=== S3 Offload Sample Statistics ==="); - System.out.printf( - "Full payload JSON size: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); - System.out.printf( - "Stored in BlobStore: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); - System.out.printf( - "Stored in Cadence history: %d bytes (claim-check reference only)%n", cadenceBytes); - System.out.printf( - "Reduction in Cadence: %.1f%%%n", - jsonSize == 0 ? 0.0 : 100.0 * (1.0 - (double) cadenceBytes / jsonSize)); - System.out.printf("BlobStore location: %s%n", store.baseDir()); - System.out.printf( - "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", - DataConverterConstants.DOMAIN, - DataConverterConstants.TASK_LIST_S3, - DataConverterConstants.S3_OFFLOAD_WORKFLOW_TYPE); - System.out.println("====================================="); - System.out.println(); - } - - private static String hexPreview(byte[] data, int byteLimit) { - int len = Math.min(byteLimit, data.length); - StringBuilder sb = new StringBuilder(len * 2 + 3); - for (int i = 0; i < len; i++) { - sb.append(String.format("%02x", data[i] & 0xff)); - } - if (data.length > byteLimit) { - sb.append("..."); - } - return sb.toString(); - } - - private static String repeatChar(char c, int n) { - char[] buf = new char[n]; - java.util.Arrays.fill(buf, c); - return new String(buf); - } -} diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/README.md b/src/main/java/com/uber/cadence/samples/dataconverter/README.md deleted file mode 100644 index 8c9691ac..00000000 --- a/src/main/java/com/uber/cadence/samples/dataconverter/README.md +++ /dev/null @@ -1,170 +0,0 @@ -# DataConverter Samples - -Three practical patterns for custom `DataConverter` implementations in the Cadence Java client: **compression**, **encryption**, and **BlobStore / S3 claim-check offload**. A `DataConverter` controls how every workflow input, output, and activity parameter is serialized before it is written to Cadence history — making it the right place to add compression, encryption, or external offloading without changing any workflow or activity code. - -## What is a DataConverter? - -`com.uber.cadence.converter.DataConverter` defines three methods: - -- `byte[] toData(Object... values)` — called before data is written to Cadence history. -- ` T fromData(byte[] content, Class valueClass, Type valueType)` — called for single-value payloads (workflow/activity results, internal payloads). -- `Object[] fromDataArray(byte[] content, Type... valueTypes)` — called to decode workflow/activity argument lists on the worker side. - -The same `DataConverter` must be used by **both the worker and any client that sends or receives non-trivial workflow data**. In these samples the workflows generate their payloads internally and take no inputs, so they can be started from the Cadence CLI without bundling a custom converter into the CLI itself. - -Each sample uses its own task list so it can have its own `DataConverter`. `DataConverterWorker` starts one worker per task list in a single process. - -## Prerequisites - -1. Cadence server running (e.g. Docker Compose from the [Cadence repo](https://github.com/uber/cadence)). -2. From the repo root, build: `./gradlew build`. - -### Register the domain (required once per cluster) - -Starters use domain **`samples-domain`**. If you see `Domain samples-domain does not exist`, register it **before** starting workflows: - -```bash -./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain -``` - -Or with the Cadence CLI: - -```bash -cadence --domain samples-domain domain register -``` - -See also the root [README.md](../../../../../../../../README.md). - -## Run the worker (terminal 1) - -Leave this process running. It starts three workers — one per `DataConverter` — and prints a stats banner per sample: - -```bash -cd /path/to/cadence-java-samples -./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.DataConverterWorker -``` - -## Start a workflow (terminal 2) - -Run **one** of the starters per sample run. Each starts a new workflow execution and exits. - -**Compression** — gzip-over-JSON: - -```bash -./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.CompressionStarter -``` - -**Encryption** — AES-256-GCM: - -```bash -./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.EncryptionStarter -``` - -**S3 offload** — claim-check pattern with a zero-config local `BlobStore`: - -```bash -./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.S3OffloadStarter -``` - -You can also start any of the three from the Cadence CLI; the commands are printed in the worker's stats banner on startup. - ---- - -## Compression Sample - -`CompressedDataConverterWorkflow` demonstrates gzip-over-JSON compression. For repetitive JSON data this typically achieves 60–80% size reduction, lowering storage cost and bandwidth for large workflow payloads. The converter is implemented in [`CompressedJsonDataConverter.java`](CompressedJsonDataConverter.java) — it wraps `JsonDataConverter.getInstance()`, post-processes the resulting bytes through `java.util.zip.GZIP*Stream`, and caps decompressed output to avoid unbounded memory growth on malformed input. - -- **Task list:** `data-compression` -- **Workflow type:** `CompressedDataConverterWorkflow` - ---- - -## Encryption Sample - -`EncryptedDataConverterWorkflow` demonstrates AES-256-GCM encryption. Every workflow input, output, and activity parameter is encrypted before being written to Cadence history. Without the key, payloads stored by the Cadence server are unreadable to operators browsing workflow history. Logs, metrics, search attributes, and application output are separate disclosure surfaces. - -The sample uses a `SensitiveCustomerRecord` containing realistic PII and PHI fields (name, email, SSN, credit card, medical notes) to make the use case concrete. - -- **Task list:** `data-encryption` -- **Workflow type:** `EncryptedDataConverterWorkflow` - -### Encryption key - -By default, the worker uses a hardcoded demo key and prints a prominent warning. To use your own key: - -```bash -export CADENCE_ENCRYPTION_KEY=$(openssl rand -hex 32) -./gradlew -q execute -PmainClass=com.uber.cadence.samples.dataconverter.DataConverterWorker -``` - -> **WARNING:** The hardcoded demo key (`cadence-demo-key-NOT-FOR-PROD!!!`) is public. Never use it in production. In production, load your key from a secrets manager (AWS Secrets Manager, HashiCorp Vault, GCP Secret Manager, etc.). - -### How AES-256-GCM works - -- `toData`: JSON-encode arguments → generate a 12-byte random nonce → `Cipher.doFinal` with `AES/GCM/NoPadding` → return `nonce || ciphertext || tag`. -- `fromData` / `fromDataArray`: split nonce from input → `Cipher.doFinal` (decrypt) → JSON-decode. - -The GCM authentication tag (16 bytes) ensures any ciphertext tampering is detected. The random nonce means the same plaintext produces different ciphertext on every call, which preserves semantic security for repeated payloads. - ---- - -## S3 Offload Sample (claim-check pattern) - -`S3OffloadDataConverterWorkflow` demonstrates the *claim-check* pattern: payloads larger than a configurable threshold are stored in an external [`BlobStore`](BlobStore.java) and only a small reference (a few dozen bytes) travels through Cadence workflow history. The runnable sample uses [`LocalFsBlobStore`](LocalFsBlobStore.java) so it works without cloud credentials; the same abstraction can be backed by S3 in production. This solves Cadence's per-payload size limits (~2 MB) for workflows that pass very large datasets between the workflow and its activities. - -- **Task list:** `data-s3` -- **Workflow type:** `S3OffloadDataConverterWorkflow` - -### How it works - -- `toData`: JSON-encode → if `len(json) > thresholdBytes`, upload to `BlobStore` under a SHA-256 key and return `0x01 || {"s3Ref":"/"}`. Otherwise return `0x00 || json` inline. -- `fromData` / `fromDataArray`: read prefix byte → if `0x01`, fetch from `BlobStore` and decode; if `0x00`, decode inline. - -SHA-256-of-payload is used as the key so `toData` is idempotent across Cadence workflow replays. Using a fresh UUID per call would write a new orphaned blob on every replay. - -### Default store (zero-config) - -Out of the box, [`LocalFsBlobStore`](LocalFsBlobStore.java) writes blobs to `${java.io.tmpdir}/cadence-java-samples-data-s3/`. No cloud credentials or additional dependencies are needed. - -### Swapping in real AWS S3 - -The top of [`S3OffloadDataConverter.java`](S3OffloadDataConverter.java) contains a commented `S3BlobStore` skeleton showing the AWS SDK v2 calls needed. To enable it: - -1. Add AWS SDK v2 to `build.gradle`: - ```groovy - implementation group: 'software.amazon.awssdk', name: 's3', version: '2.25.0' - ``` -2. Implement `BlobStore` against `software.amazon.awssdk.services.s3.S3Client` (the commented stub shows the exact calls). -3. Replace `new LocalFsBlobStore()` with `new S3BlobStore("my-bucket", "us-east-1")` in `DataConverterWorker`. -4. Set standard AWS environment variables (`AWS_REGION`, `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`) or use an IAM instance role. - -You can also point the SDK at [LocalStack](https://localstack.cloud/) or [MinIO](https://min.io/) for local testing without a real AWS account. - -> **Note on cleanup:** `S3OffloadDataConverter` does not delete blobs after the workflow completes. In production, use S3 object lifecycle policies to automatically expire old blobs. - ---- - -## When to use which pattern - -| Pattern | Best for | -|---------|----------| -| **Compression** | Large repetitive JSON payloads; reducing storage cost without confidentiality requirements | -| **Encryption** | PII, PHI, secrets, or any data that must be unreadable in Cadence history | -| **BlobStore / S3 Offload** | Payloads approaching Cadence's size limits; binary or non-JSON data; cost-conscious archival | - -Patterns can be composed, but order matters. Compress before encrypting when size reduction is a goal; encrypt before offloading when the external store should only receive ciphertext. - -## Source layout - -| File | Purpose | -|------|---------| -| [`DataConverterConstants.java`](DataConverterConstants.java) | Task list and workflow type names plus the shared Cadence domain | -| [`DataConverterSupport.java`](DataConverterSupport.java) | Shared `WorkflowClient` factory + friendly "domain missing" hint | -| [`DataConverterWorker.java`](DataConverterWorker.java) | Hosts all three workers; prints stats banners on startup | -| [`CompressedJsonDataConverter.java`](CompressedJsonDataConverter.java) | gzip-over-JSON `DataConverter` | -| [`EncryptedJsonDataConverter.java`](EncryptedJsonDataConverter.java) | AES-256-GCM `DataConverter` | -| [`EncryptionKeyLoader.java`](EncryptionKeyLoader.java) | Reads `CADENCE_ENCRYPTION_KEY` with demo-key fallback | -| [`BlobStore.java`](BlobStore.java) / [`LocalFsBlobStore.java`](LocalFsBlobStore.java) | `BlobStore` abstraction + local-FS default | -| [`S3OffloadDataConverter.java`](S3OffloadDataConverter.java) | Claim-check `DataConverter` with commented AWS S3 stub | -| `*DataConverterWorkflow.java` | One workflow + activity per sample (each takes no inputs) | -| `*Starter.java` | Thin async starters mirroring the existing `query/` samples | diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/encryption/EncryptedDataConverterWorkflow.java similarity index 92% rename from src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java rename to src/main/java/com/uber/cadence/samples/encryption/EncryptedDataConverterWorkflow.java index e9502a80..70a3bc4e 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedDataConverterWorkflow.java +++ b/src/main/java/com/uber/cadence/samples/encryption/EncryptedDataConverterWorkflow.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.encryption; import com.uber.cadence.activity.ActivityMethod; import com.uber.cadence.activity.ActivityOptions; @@ -36,6 +36,14 @@ public final class EncryptedDataConverterWorkflow { private EncryptedDataConverterWorkflow() {} + /** Task list polled by {@link EncryptionWorker}. */ + public static final String TASK_LIST = "data-encryption"; + + /** + * Registered workflow type, used for both {@code @WorkflowMethod} and CLI {@code workflow start}. + */ + public static final String WORKFLOW_TYPE = "EncryptedDataConverterWorkflow"; + // ---------------- POJOs ---------------- /** PII / PHI-style record that must be encrypted in workflow history. */ @@ -79,9 +87,9 @@ public static SensitiveCustomerRecord createSensitiveCustomerRecord() { public interface WorkflowIface { @WorkflowMethod( - name = DataConverterConstants.ENCRYPTION_WORKFLOW_TYPE, + name = WORKFLOW_TYPE, executionStartToCloseTimeoutSeconds = 60, - taskList = DataConverterConstants.TASK_LIST_ENCRYPTION + taskList = TASK_LIST ) SensitiveCustomerRecord run(); } diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java b/src/main/java/com/uber/cadence/samples/encryption/EncryptedJsonDataConverter.java similarity index 99% rename from src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java rename to src/main/java/com/uber/cadence/samples/encryption/EncryptedJsonDataConverter.java index 20c7d803..e0d53fc2 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptedJsonDataConverter.java +++ b/src/main/java/com/uber/cadence/samples/encryption/EncryptedJsonDataConverter.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.encryption; import com.uber.cadence.converter.DataConverter; import com.uber.cadence.converter.DataConverterException; diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java b/src/main/java/com/uber/cadence/samples/encryption/EncryptionKeyLoader.java similarity index 98% rename from src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java rename to src/main/java/com/uber/cadence/samples/encryption/EncryptionKeyLoader.java index f82b695d..7b57a72d 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionKeyLoader.java +++ b/src/main/java/com/uber/cadence/samples/encryption/EncryptionKeyLoader.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.encryption; import java.nio.charset.StandardCharsets; diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java b/src/main/java/com/uber/cadence/samples/encryption/EncryptionStarter.java similarity index 50% rename from src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java rename to src/main/java/com/uber/cadence/samples/encryption/EncryptionStarter.java index cac5bb9c..3f713034 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/EncryptionStarter.java +++ b/src/main/java/com/uber/cadence/samples/encryption/EncryptionStarter.java @@ -15,10 +15,14 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.encryption; import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowClientOptions; import com.uber.cadence.client.WorkflowOptions; +import com.uber.cadence.internal.compatibility.Thrift2ProtoAdapter; +import com.uber.cadence.internal.compatibility.proto.serviceclient.IGrpcServiceStubs; +import com.uber.cadence.samples.common.SampleConstants; import java.time.Duration; import java.util.UUID; @@ -43,10 +47,13 @@ private EncryptionStarter() {} public static void main(String[] args) { try { - WorkflowClient client = DataConverterSupport.newWorkflowClient(); + WorkflowClient client = + WorkflowClient.newInstance( + new Thrift2ProtoAdapter(IGrpcServiceStubs.newInstance()), + WorkflowClientOptions.newBuilder().setDomain(SampleConstants.DOMAIN).build()); WorkflowOptions options = new WorkflowOptions.Builder() - .setTaskList(DataConverterConstants.TASK_LIST_ENCRYPTION) + .setTaskList(EncryptedDataConverterWorkflow.TASK_LIST) .setExecutionStartToCloseTimeout(Duration.ofMinutes(1)) .setWorkflowId("encryption-" + UUID.randomUUID()) .build(); @@ -56,15 +63,45 @@ public static void main(String[] args) { WorkflowClient.start(workflow::run); System.out.println( - "Started EncryptedDataConverterWorkflow on task list \"" - + DataConverterConstants.TASK_LIST_ENCRYPTION + "Started " + + EncryptedDataConverterWorkflow.WORKFLOW_TYPE + + " on task list \"" + + EncryptedDataConverterWorkflow.TASK_LIST + "\"."); System.exit(0); } catch (RuntimeException e) { - if (DataConverterSupport.printHintIfDomainMissing(e)) { + if (printHintIfDomainMissing(e)) { System.exit(1); } throw e; } } + + /** + * Prints a copy-paste hint when the Cadence error indicates the sample domain has not been + * registered. + * + * @return true if {@code t} was a missing-domain error and a hint was printed (caller should + * exit). + */ + static boolean printHintIfDomainMissing(Throwable t) { + for (Throwable c = t; c != null; c = c.getCause()) { + String m = c.getMessage(); + if (m != null && m.contains("Domain") && m.contains("does not exist")) { + System.err.println(); + System.err.println( + "Cadence reported that the domain \"" + SampleConstants.DOMAIN + "\" does not exist."); + System.err.println("Register it once against your cluster, then run this again:"); + System.err.println(); + System.err.println( + " ./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain"); + System.err.println(); + System.err.println("Or with Cadence CLI:"); + System.err.println(" cadence --domain " + SampleConstants.DOMAIN + " domain register"); + System.err.println(); + return true; + } + } + return false; + } } diff --git a/src/main/java/com/uber/cadence/samples/encryption/EncryptionWorker.java b/src/main/java/com/uber/cadence/samples/encryption/EncryptionWorker.java new file mode 100644 index 00000000..20086a4f --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/encryption/EncryptionWorker.java @@ -0,0 +1,106 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.encryption; + +import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowClientOptions; +import com.uber.cadence.converter.DataConverter; +import com.uber.cadence.converter.JsonDataConverter; +import com.uber.cadence.internal.compatibility.Thrift2ProtoAdapter; +import com.uber.cadence.internal.compatibility.proto.serviceclient.IGrpcServiceStubs; +import com.uber.cadence.samples.common.SampleConstants; +import com.uber.cadence.worker.Worker; +import com.uber.cadence.worker.WorkerFactory; + +/** + * Hosts the AES-256-GCM encryption sample worker. Constructs a {@link WorkflowClient} configured + * with {@link EncryptedJsonDataConverter} so every workflow input, output, and activity parameter + * is transparently encrypted before Cadence history sees it. The encryption key comes from {@link + * EncryptionKeyLoader} (env var {@code CADENCE_ENCRYPTION_KEY}, or a hardcoded demo key with a + * warning). + */ +public final class EncryptionWorker { + + private EncryptionWorker() {} + + public static void main(String[] args) { + DataConverter converter = + new EncryptedJsonDataConverter(EncryptionKeyLoader.loadEncryptionKey()); + WorkflowClient client = + WorkflowClient.newInstance( + new Thrift2ProtoAdapter(IGrpcServiceStubs.newInstance()), + WorkflowClientOptions.newBuilder() + .setDomain(SampleConstants.DOMAIN) + .setDataConverter(converter) + .build()); + + WorkerFactory factory = WorkerFactory.newInstance(client); + Worker worker = factory.newWorker(EncryptedDataConverterWorkflow.TASK_LIST); + worker.registerWorkflowImplementationTypes(EncryptedDataConverterWorkflow.WorkflowImpl.class); + worker.registerActivitiesImplementations(new EncryptedDataConverterWorkflow.ActivitiesImpl()); + factory.start(); + + printEncryptionStats(converter); + + System.out.println( + "EncryptionWorker listening on \"" + + EncryptedDataConverterWorkflow.TASK_LIST + + "\" (domain \"" + + SampleConstants.DOMAIN + + "\")."); + + Runtime.getRuntime().addShutdownHook(new Thread(factory::shutdown)); + } + + private static void printEncryptionStats(DataConverter converter) { + EncryptedDataConverterWorkflow.SensitiveCustomerRecord record = + EncryptedDataConverterWorkflow.createSensitiveCustomerRecord(); + byte[] plaintext = JsonDataConverter.getInstance().toData(record); + byte[] ciphertext = converter.toData(record); + int plaintextSize = plaintext == null ? 0 : plaintext.length; + int ciphertextSize = ciphertext == null ? 0 : ciphertext.length; + String preview = ciphertext == null ? "" : hexPreview(ciphertext, 40); + + System.out.println(); + System.out.println("=== Encryption Sample Statistics ==="); + System.out.printf("Plaintext JSON size: %d bytes%n", plaintextSize); + System.out.printf( + "Encrypted payload: %d bytes (growth: %d bytes vs plaintext JSON)%n", + ciphertextSize, ciphertextSize - plaintextSize); + System.out.printf("Ciphertext preview: %s%n", preview); + System.out.printf( + "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", + SampleConstants.DOMAIN, + EncryptedDataConverterWorkflow.TASK_LIST, + EncryptedDataConverterWorkflow.WORKFLOW_TYPE); + System.out.println("===================================="); + System.out.println(); + } + + private static String hexPreview(byte[] data, int byteLimit) { + int len = Math.min(byteLimit, data.length); + StringBuilder sb = new StringBuilder(len * 2 + 3); + for (int i = 0; i < len; i++) { + sb.append(String.format("%02x", data[i] & 0xff)); + } + if (data.length > byteLimit) { + sb.append("..."); + } + return sb.toString(); + } +} diff --git a/src/main/java/com/uber/cadence/samples/encryption/README.md b/src/main/java/com/uber/cadence/samples/encryption/README.md new file mode 100644 index 00000000..a23c8582 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/encryption/README.md @@ -0,0 +1,74 @@ +# Encryption DataConverter Sample + +A custom Cadence [`DataConverter`](../../../../../../../../README.md) that JSON-encodes workflow data and then encrypts it with AES-256-GCM. Every workflow input, output, and activity parameter is encrypted before being written to Cadence history. Without the key, payloads stored by the Cadence server are unreadable to operators browsing workflow history. + +Note that application logs, metrics, and search attributes are separate disclosure surfaces — a `DataConverter` does not protect them. Treat them accordingly. + +- **Task list:** `data-encryption` +- **Workflow type:** `EncryptedDataConverterWorkflow` + +## Prerequisites + +1. Cadence server running (e.g. Docker Compose from the [Cadence repo](https://github.com/uber/cadence)). +2. From the repo root, build: `./gradlew build`. + +### Register the domain (required once per cluster) + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain +``` + +Or with the Cadence CLI: + +```bash +cadence --domain samples-domain domain register +``` + +### Encryption key + +The worker loads its AES-256 key from the `CADENCE_ENCRYPTION_KEY` environment variable (64 hex characters = 32 bytes). If the env var is unset, the worker falls back to a hardcoded demo key and prints a warning — **never use the demo key in production**. If the env var is set but invalid, the worker fails fast instead of silently using the demo key. + +Generate a key: + +```bash +export CADENCE_ENCRYPTION_KEY=$(openssl rand -hex 32) +``` + +## Run the worker (terminal 1) + +The worker prints an encryption statistics banner showing plaintext vs ciphertext size and a hex preview, then begins polling the `data-encryption` task list: + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.encryption.EncryptionWorker +``` + +## Start a workflow (terminal 2) + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.encryption.EncryptionStarter +``` + +Or from the Cadence CLI: + +```bash +cadence --domain samples-domain \ + workflow start \ + --workflow_type EncryptedDataConverterWorkflow \ + --tl data-encryption \ + --et 60 +``` + +## How it works + +- `toData`: JSON-encode the arguments with the standard `JsonDataConverter`, then encrypt with `AES/GCM/NoPadding` using a fresh 12-byte random nonce. The output layout is `nonce(12 bytes) || ciphertext || tag(16 bytes)`. A new nonce per call preserves semantic security for repeated payloads. +- `fromData` / `fromDataArray`: split nonce + ciphertext, run AES-GCM decrypt (which authenticates the tag and fails on any tampering), then delegate to `JsonDataConverter`. + +## Source layout + +| File | Purpose | +|------|---------| +| [`EncryptedJsonDataConverter.java`](EncryptedJsonDataConverter.java) | The custom `DataConverter` | +| [`EncryptionKeyLoader.java`](EncryptionKeyLoader.java) | Reads the 32-byte key from `CADENCE_ENCRYPTION_KEY` or the demo fallback | +| [`EncryptedDataConverterWorkflow.java`](EncryptedDataConverterWorkflow.java) | Workflow + activity + sample `SensitiveCustomerRecord` POJO and generator | +| [`EncryptionWorker.java`](EncryptionWorker.java) | Worker main; wires the converter into `WorkflowClientOptions` and prints the stats banner | +| [`EncryptionStarter.java`](EncryptionStarter.java) | Thin async starter | diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/BlobStore.java b/src/main/java/com/uber/cadence/samples/s3offload/BlobStore.java similarity index 96% rename from src/main/java/com/uber/cadence/samples/dataconverter/BlobStore.java rename to src/main/java/com/uber/cadence/samples/s3offload/BlobStore.java index d4fdaa3a..f560b57e 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/BlobStore.java +++ b/src/main/java/com/uber/cadence/samples/s3offload/BlobStore.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.s3offload; import java.io.IOException; diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java b/src/main/java/com/uber/cadence/samples/s3offload/LocalFsBlobStore.java similarity index 98% rename from src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java rename to src/main/java/com/uber/cadence/samples/s3offload/LocalFsBlobStore.java index b066e5ec..9e55a9a1 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/LocalFsBlobStore.java +++ b/src/main/java/com/uber/cadence/samples/s3offload/LocalFsBlobStore.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.s3offload; import java.io.IOException; import java.nio.charset.StandardCharsets; diff --git a/src/main/java/com/uber/cadence/samples/s3offload/README.md b/src/main/java/com/uber/cadence/samples/s3offload/README.md new file mode 100644 index 00000000..2290247d --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/s3offload/README.md @@ -0,0 +1,79 @@ +# S3 Offload (Claim-Check) DataConverter Sample + +A custom Cadence [`DataConverter`](../../../../../../../../README.md) that implements the **claim-check pattern**: payloads larger than a configurable threshold are stored in an external `BlobStore` (S3 / GCS / local disk) and only a small reference travels through Cadence workflow history. + +This solves Cadence's per-payload size limits (~2 MB) for workflows that pass very large datasets, and lowers history storage cost for long-running workflows that pass large repeatable data. + +- **Task list:** `data-s3` +- **Workflow type:** `S3OffloadDataConverterWorkflow` +- **Default threshold:** 4 KB (deliberately low so the demo always offloads) +- **Default backing store:** [`LocalFsBlobStore`](LocalFsBlobStore.java) writing to `${java.io.tmpdir}/cadence-java-samples-data-s3/` + +## Prerequisites + +1. Cadence server running (e.g. Docker Compose from the [Cadence repo](https://github.com/uber/cadence)). +2. From the repo root, build: `./gradlew build`. + +### Register the domain (required once per cluster) + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain +``` + +Or with the Cadence CLI: + +```bash +cadence --domain samples-domain domain register +``` + +## Run the worker (terminal 1) + +The worker prints an S3-offload statistics banner showing how much was offloaded to the blob store vs how little ends up in Cadence history, then begins polling the `data-s3` task list: + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.s3offload.S3OffloadWorker +``` + +## Start a workflow (terminal 2) + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.s3offload.S3OffloadStarter +``` + +Or from the Cadence CLI: + +```bash +cadence --domain samples-domain \ + workflow start \ + --workflow_type S3OffloadDataConverterWorkflow \ + --tl data-s3 \ + --et 60 +``` + +## How it works + +- `toData`: JSON-encode the arguments with the standard `JsonDataConverter`. If the resulting bytes are at or below the threshold, write `0x00 || json` and return inline. Otherwise compute a SHA-256 of the bytes, `PUT` to the blob store under `/`, and return `0x01 || json({"s3Ref":"/"})`. Using the content hash as the key makes `toData` idempotent across Cadence workflow replays. +- `fromData` / `fromDataArray`: read the 1-byte prefix; inline payloads pass straight to `JsonDataConverter`, offloaded payloads first fetch the blob via `BlobStore.get`. +- Cleanup: this sample does not delete blobs after the workflow completes. In production, use S3 object lifecycle policies to expire old blobs automatically. + +## Swapping `LocalFsBlobStore` for real S3 + +The header comment in [`S3OffloadDataConverter.java`](S3OffloadDataConverter.java) sketches an `S3BlobStore` implementation using AWS SDK v2: + +1. Add `software.amazon.awssdk:s3:2.25.0` to `build.gradle`. +2. Implement `BlobStore` against `software.amazon.awssdk.services.s3.S3Client`. +3. Replace `new LocalFsBlobStore()` with `new S3BlobStore("my-bucket", "us-east-1")` in [`S3OffloadWorker`](S3OffloadWorker.java). +4. Provide credentials via standard AWS env vars or an IAM instance role. + +Point the SDK at LocalStack or MinIO for local testing without a real AWS account. + +## Source layout + +| File | Purpose | +|------|---------| +| [`BlobStore.java`](BlobStore.java) | Two-method abstraction over any object store | +| [`LocalFsBlobStore.java`](LocalFsBlobStore.java) | Zero-config implementation writing to the temp dir | +| [`S3OffloadDataConverter.java`](S3OffloadDataConverter.java) | The custom `DataConverter`; also contains the S3 stub | +| [`S3OffloadDataConverterWorkflow.java`](S3OffloadDataConverterWorkflow.java) | Workflow + activity + sample `S3LargePayload` POJOs and generator | +| [`S3OffloadWorker.java`](S3OffloadWorker.java) | Worker main; wires the converter into `WorkflowClientOptions` and prints the stats banner | +| [`S3OffloadStarter.java`](S3OffloadStarter.java) | Thin async starter | diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java b/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverter.java similarity index 99% rename from src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java rename to src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverter.java index f14c67bb..b29e45a0 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverter.java +++ b/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverter.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.s3offload; import com.uber.cadence.converter.DataConverter; import com.uber.cadence.converter.DataConverterException; @@ -79,7 +79,7 @@ * } * * 3. Replace `new LocalFsBlobStore()` with `new S3BlobStore("my-bucket", "us-east-1")` in - * DataConverterWorker. + * S3OffloadWorker. * 4. Set standard AWS env vars (AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) or use an * IAM instance role. * diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterWorkflow.java similarity index 83% rename from src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java rename to src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterWorkflow.java index b1c8f2be..2d2456c0 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadDataConverterWorkflow.java +++ b/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterWorkflow.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.s3offload; import com.uber.cadence.activity.ActivityMethod; import com.uber.cadence.activity.ActivityOptions; @@ -38,6 +38,24 @@ public final class S3OffloadDataConverterWorkflow { private S3OffloadDataConverterWorkflow() {} + /** Task list polled by {@link S3OffloadWorker}. */ + public static final String TASK_LIST = "data-s3"; + + /** + * Registered workflow type, used for both {@code @WorkflowMethod} and CLI {@code workflow start}. + */ + public static final String WORKFLOW_TYPE = "S3OffloadDataConverterWorkflow"; + + /** Logical bucket / prefix embedded in S3-offload reference keys. */ + public static final String S3_BUCKET = "data-s3"; + + /** + * Payloads larger than this are offloaded to the BlobStore by {@link S3OffloadDataConverter}. + * Cadence's default max payload is roughly 2 MB; the threshold is set intentionally low so the + * demo workflow comfortably triggers offloading. + */ + public static final int DEFAULT_THRESHOLD_BYTES = 4096; + // ---------------- POJOs ---------------- public static final class S3LargePayload { @@ -60,8 +78,8 @@ public S3DataPoint() {} } /** - * Builds a payload comfortably larger than {@link - * DataConverterConstants#S3_DEFAULT_THRESHOLD_BYTES} so every workflow run triggers an offload. + * Builds a payload comfortably larger than {@link #DEFAULT_THRESHOLD_BYTES} so every workflow run + * triggers an offload. */ public static S3LargePayload createS3LargePayload() { S3LargePayload p = new S3LargePayload(); @@ -102,9 +120,9 @@ private static String repeat(String s, int n) { public interface WorkflowIface { @WorkflowMethod( - name = DataConverterConstants.S3_OFFLOAD_WORKFLOW_TYPE, + name = WORKFLOW_TYPE, executionStartToCloseTimeoutSeconds = 60, - taskList = DataConverterConstants.TASK_LIST_S3 + taskList = TASK_LIST ) S3LargePayload run(); } diff --git a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java b/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadStarter.java similarity index 50% rename from src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java rename to src/main/java/com/uber/cadence/samples/s3offload/S3OffloadStarter.java index e27da8a6..028e6096 100644 --- a/src/main/java/com/uber/cadence/samples/dataconverter/S3OffloadStarter.java +++ b/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadStarter.java @@ -15,10 +15,14 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.dataconverter; +package com.uber.cadence.samples.s3offload; import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowClientOptions; import com.uber.cadence.client.WorkflowOptions; +import com.uber.cadence.internal.compatibility.Thrift2ProtoAdapter; +import com.uber.cadence.internal.compatibility.proto.serviceclient.IGrpcServiceStubs; +import com.uber.cadence.samples.common.SampleConstants; import java.time.Duration; import java.util.UUID; @@ -43,10 +47,13 @@ private S3OffloadStarter() {} public static void main(String[] args) { try { - WorkflowClient client = DataConverterSupport.newWorkflowClient(); + WorkflowClient client = + WorkflowClient.newInstance( + new Thrift2ProtoAdapter(IGrpcServiceStubs.newInstance()), + WorkflowClientOptions.newBuilder().setDomain(SampleConstants.DOMAIN).build()); WorkflowOptions options = new WorkflowOptions.Builder() - .setTaskList(DataConverterConstants.TASK_LIST_S3) + .setTaskList(S3OffloadDataConverterWorkflow.TASK_LIST) .setExecutionStartToCloseTimeout(Duration.ofMinutes(1)) .setWorkflowId("s3-offload-" + UUID.randomUUID()) .build(); @@ -56,15 +63,45 @@ public static void main(String[] args) { WorkflowClient.start(workflow::run); System.out.println( - "Started S3OffloadDataConverterWorkflow on task list \"" - + DataConverterConstants.TASK_LIST_S3 + "Started " + + S3OffloadDataConverterWorkflow.WORKFLOW_TYPE + + " on task list \"" + + S3OffloadDataConverterWorkflow.TASK_LIST + "\"."); System.exit(0); } catch (RuntimeException e) { - if (DataConverterSupport.printHintIfDomainMissing(e)) { + if (printHintIfDomainMissing(e)) { System.exit(1); } throw e; } } + + /** + * Prints a copy-paste hint when the Cadence error indicates the sample domain has not been + * registered. + * + * @return true if {@code t} was a missing-domain error and a hint was printed (caller should + * exit). + */ + static boolean printHintIfDomainMissing(Throwable t) { + for (Throwable c = t; c != null; c = c.getCause()) { + String m = c.getMessage(); + if (m != null && m.contains("Domain") && m.contains("does not exist")) { + System.err.println(); + System.err.println( + "Cadence reported that the domain \"" + SampleConstants.DOMAIN + "\" does not exist."); + System.err.println("Register it once against your cluster, then run this again:"); + System.err.println(); + System.err.println( + " ./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain"); + System.err.println(); + System.err.println("Or with Cadence CLI:"); + System.err.println(" cadence --domain " + SampleConstants.DOMAIN + " domain register"); + System.err.println(); + return true; + } + } + return false; + } } diff --git a/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadWorker.java b/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadWorker.java new file mode 100644 index 00000000..5852e231 --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadWorker.java @@ -0,0 +1,116 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.s3offload; + +import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowClientOptions; +import com.uber.cadence.converter.DataConverter; +import com.uber.cadence.converter.JsonDataConverter; +import com.uber.cadence.internal.compatibility.Thrift2ProtoAdapter; +import com.uber.cadence.internal.compatibility.proto.serviceclient.IGrpcServiceStubs; +import com.uber.cadence.samples.common.SampleConstants; +import com.uber.cadence.worker.Worker; +import com.uber.cadence.worker.WorkerFactory; + +/** + * Hosts the S3 offload (claim-check) sample worker. Constructs a {@link WorkflowClient} configured + * with {@link S3OffloadDataConverter} backed by {@link LocalFsBlobStore} so payloads above the + * threshold are stored on disk and replaced in Cadence history with a small reference. Swap in a + * real S3-backed {@link BlobStore} (see comments in {@link S3OffloadDataConverter}) to move blobs + * to S3 without changing any workflow or activity code. + */ +public final class S3OffloadWorker { + + private S3OffloadWorker() {} + + public static void main(String[] args) { + LocalFsBlobStore blobStore = new LocalFsBlobStore(); + DataConverter converter = + new S3OffloadDataConverter( + blobStore, + S3OffloadDataConverterWorkflow.S3_BUCKET, + S3OffloadDataConverterWorkflow.DEFAULT_THRESHOLD_BYTES); + WorkflowClient client = + WorkflowClient.newInstance( + new Thrift2ProtoAdapter(IGrpcServiceStubs.newInstance()), + WorkflowClientOptions.newBuilder() + .setDomain(SampleConstants.DOMAIN) + .setDataConverter(converter) + .build()); + + WorkerFactory factory = WorkerFactory.newInstance(client); + Worker worker = factory.newWorker(S3OffloadDataConverterWorkflow.TASK_LIST); + worker.registerWorkflowImplementationTypes(S3OffloadDataConverterWorkflow.WorkflowImpl.class); + worker.registerActivitiesImplementations(new S3OffloadDataConverterWorkflow.ActivitiesImpl()); + factory.start(); + + printS3OffloadStats(blobStore); + + System.out.println( + "S3OffloadWorker listening on \"" + + S3OffloadDataConverterWorkflow.TASK_LIST + + "\" (domain \"" + + SampleConstants.DOMAIN + + "\")."); + + Runtime.getRuntime().addShutdownHook(new Thread(factory::shutdown)); + } + + private static void printS3OffloadStats(LocalFsBlobStore store) { + S3OffloadDataConverterWorkflow.S3LargePayload payload = + S3OffloadDataConverterWorkflow.createS3LargePayload(); + byte[] jsonBytes = JsonDataConverter.getInstance().toData(payload); + int jsonSize = jsonBytes == null ? 0 : jsonBytes.length; + // History footprint = 1 prefix byte + JSON envelope {"s3Ref":"/"}. + // SHA-256 hex digest is 64 chars; bucket + "/" + 64 hex chars. + int cadenceBytes = + 1 + + ("{\"s3Ref\":\"" + + S3OffloadDataConverterWorkflow.S3_BUCKET + + "/" + + repeatChar('a', 64) + + "\"}") + .length(); + + System.out.println(); + System.out.println("=== S3 Offload Sample Statistics ==="); + System.out.printf( + "Full payload JSON size: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); + System.out.printf( + "Stored in BlobStore: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); + System.out.printf( + "Stored in Cadence history: %d bytes (claim-check reference only)%n", cadenceBytes); + System.out.printf( + "Reduction in Cadence: %.1f%%%n", + jsonSize == 0 ? 0.0 : 100.0 * (1.0 - (double) cadenceBytes / jsonSize)); + System.out.printf("BlobStore location: %s%n", store.baseDir()); + System.out.printf( + "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", + SampleConstants.DOMAIN, + S3OffloadDataConverterWorkflow.TASK_LIST, + S3OffloadDataConverterWorkflow.WORKFLOW_TYPE); + System.out.println("====================================="); + System.out.println(); + } + + private static String repeatChar(char c, int n) { + char[] buf = new char[n]; + java.util.Arrays.fill(buf, c); + return new String(buf); + } +} diff --git a/src/test/java/com/uber/cadence/samples/compression/CompressedJsonDataConverterTest.java b/src/test/java/com/uber/cadence/samples/compression/CompressedJsonDataConverterTest.java new file mode 100644 index 00000000..fc2274d9 --- /dev/null +++ b/src/test/java/com/uber/cadence/samples/compression/CompressedJsonDataConverterTest.java @@ -0,0 +1,73 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.compression; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.uber.cadence.converter.DataConverterException; +import org.junit.Test; + +public class CompressedJsonDataConverterTest { + + @Test + public void testCompressedConverterRoundTrip() { + CompressedJsonDataConverter converter = new CompressedJsonDataConverter(); + CompressedDataConverterWorkflow.LargePayload payload = + CompressedDataConverterWorkflow.createLargePayload(); + + byte[] encoded = converter.toData(payload); + CompressedDataConverterWorkflow.LargePayload decoded = + converter.fromData( + encoded, + CompressedDataConverterWorkflow.LargePayload.class, + CompressedDataConverterWorkflow.LargePayload.class); + + assertEquals(payload.id, decoded.id); + assertEquals(payload.name, decoded.name); + assertEquals(payload.items.size(), decoded.items.size()); + assertEquals(payload.history.size(), decoded.history.size()); + } + + @Test + public void testCompressedConverterRejectsMalformedPayload() { + CompressedJsonDataConverter converter = new CompressedJsonDataConverter(); + + try { + converter.fromData(new byte[] {1, 2, 3}, String.class, String.class); + fail("expected malformed gzip payload to fail"); + } catch (DataConverterException e) { + assertTrue(e.getMessage().contains("gunzip")); + } + } + + @Test + public void testCompressedConverterRejectsPayloadAboveLimit() { + CompressedJsonDataConverter encoder = new CompressedJsonDataConverter(); + CompressedJsonDataConverter decoder = new CompressedJsonDataConverter(8); + byte[] encoded = encoder.toData("this string inflates beyond the configured limit"); + + try { + decoder.fromData(encoded, String.class, String.class); + fail("expected oversized decompressed payload to fail"); + } catch (DataConverterException e) { + assertTrue(e.getMessage().contains("maximum size")); + } + } +} diff --git a/src/test/java/com/uber/cadence/samples/dataconverter/DataConverterSamplesTest.java b/src/test/java/com/uber/cadence/samples/dataconverter/DataConverterSamplesTest.java deleted file mode 100644 index 77685913..00000000 --- a/src/test/java/com/uber/cadence/samples/dataconverter/DataConverterSamplesTest.java +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Modifications copyright (C) 2017 Uber Technologies, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may not - * use this file except in compliance with the License. A copy of the License is - * located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package com.uber.cadence.samples.dataconverter; - -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import com.uber.cadence.client.WorkflowClient; -import com.uber.cadence.client.WorkflowClientOptions; -import com.uber.cadence.client.WorkflowOptions; -import com.uber.cadence.converter.DataConverterException; -import com.uber.cadence.testing.TestEnvironmentOptions; -import com.uber.cadence.testing.TestWorkflowEnvironment; -import com.uber.cadence.worker.Worker; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.time.Duration; -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.stream.Stream; -import org.junit.After; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class DataConverterSamplesTest { - - @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); - - private TestWorkflowEnvironment testEnv; - - @After - public void tearDown() { - if (testEnv != null) { - testEnv.close(); - } - } - - @Test - public void testCompressedConverterRoundTrip() { - CompressedJsonDataConverter converter = new CompressedJsonDataConverter(); - CompressedDataConverterWorkflow.LargePayload payload = - CompressedDataConverterWorkflow.createLargePayload(); - - byte[] encoded = converter.toData(payload); - CompressedDataConverterWorkflow.LargePayload decoded = - converter.fromData( - encoded, - CompressedDataConverterWorkflow.LargePayload.class, - CompressedDataConverterWorkflow.LargePayload.class); - - assertEquals(payload.id, decoded.id); - assertEquals(payload.name, decoded.name); - assertEquals(payload.items.size(), decoded.items.size()); - assertEquals(payload.history.size(), decoded.history.size()); - } - - @Test - public void testCompressedConverterRejectsMalformedPayload() { - CompressedJsonDataConverter converter = new CompressedJsonDataConverter(); - - try { - converter.fromData(new byte[] {1, 2, 3}, String.class, String.class); - fail("expected malformed gzip payload to fail"); - } catch (DataConverterException e) { - assertTrue(e.getMessage().contains("gunzip")); - } - } - - @Test - public void testCompressedConverterRejectsPayloadAboveLimit() { - CompressedJsonDataConverter encoder = new CompressedJsonDataConverter(); - CompressedJsonDataConverter decoder = new CompressedJsonDataConverter(8); - byte[] encoded = encoder.toData("this string inflates beyond the configured limit"); - - try { - decoder.fromData(encoded, String.class, String.class); - fail("expected oversized decompressed payload to fail"); - } catch (DataConverterException e) { - assertTrue(e.getMessage().contains("maximum size")); - } - } - - @Test - public void testEncryptedConverterRoundTripAndRandomNonce() { - EncryptedJsonDataConverter converter = - new EncryptedJsonDataConverter(EncryptionKeyLoader.DEMO_ENCRYPTION_KEY); - EncryptedDataConverterWorkflow.SensitiveCustomerRecord record = - EncryptedDataConverterWorkflow.createSensitiveCustomerRecord(); - - byte[] first = converter.toData(record); - byte[] second = converter.toData(record); - - assertFalse(Arrays.equals(first, second)); - EncryptedDataConverterWorkflow.SensitiveCustomerRecord decoded = - converter.fromData( - first, - EncryptedDataConverterWorkflow.SensitiveCustomerRecord.class, - EncryptedDataConverterWorkflow.SensitiveCustomerRecord.class); - assertEquals(record.customerId, decoded.customerId); - assertEquals(record.ssn, decoded.ssn); - assertEquals(record.medicalNotes, decoded.medicalNotes); - } - - @Test - public void testEncryptedConverterRejectsShortCiphertext() { - EncryptedJsonDataConverter converter = - new EncryptedJsonDataConverter(EncryptionKeyLoader.DEMO_ENCRYPTION_KEY); - - try { - converter.fromData(new byte[] {1, 2, 3}, String.class, String.class); - fail("expected short ciphertext to fail"); - } catch (DataConverterException e) { - assertTrue(e.getMessage().contains("Ciphertext too short")); - } - } - - @Test - public void testEncryptedConverterWorksInWorkflowEnvironment() { - EncryptedJsonDataConverter converter = - new EncryptedJsonDataConverter(EncryptionKeyLoader.DEMO_ENCRYPTION_KEY); - TestEnvironmentOptions options = - new TestEnvironmentOptions.Builder() - .setWorkflowClientOptions( - WorkflowClientOptions.newBuilder().setDataConverter(converter).build()) - .build(); - testEnv = TestWorkflowEnvironment.newInstance(options); - Worker worker = testEnv.newWorker(DataConverterConstants.TASK_LIST_ENCRYPTION); - worker.registerWorkflowImplementationTypes(EncryptedDataConverterWorkflow.WorkflowImpl.class); - worker.registerActivitiesImplementations(new EncryptedDataConverterWorkflow.ActivitiesImpl()); - testEnv.start(); - - WorkflowClient workflowClient = - testEnv.newWorkflowClient( - WorkflowClientOptions.newBuilder().setDataConverter(converter).build()); - WorkflowOptions workflowOptions = - new WorkflowOptions.Builder() - .setTaskList(DataConverterConstants.TASK_LIST_ENCRYPTION) - .setExecutionStartToCloseTimeout(Duration.ofMinutes(1)) - .build(); - EncryptedDataConverterWorkflow.WorkflowIface workflow = - workflowClient.newWorkflowStub( - EncryptedDataConverterWorkflow.WorkflowIface.class, workflowOptions); - - EncryptedDataConverterWorkflow.SensitiveCustomerRecord result = workflow.run(); - - assertEquals("cust_8a7f3b2e", result.customerId); - assertEquals("workflow-processor-v2 (Encrypted)", result.processedBy); - } - - @Test - public void testS3OffloadConverterInlinesBelowThreshold() { - RecordingBlobStore store = new RecordingBlobStore(); - S3OffloadDataConverter converter = new S3OffloadDataConverter(store, "bucket", 1024); - - byte[] encoded = converter.toData("small"); - String decoded = converter.fromData(encoded, String.class, String.class); - - assertEquals(S3OffloadDataConverter.INLINE_PREFIX, encoded[0]); - assertEquals("small", decoded); - assertTrue(store.blobs.isEmpty()); - } - - @Test - public void testS3OffloadConverterOffloadsAndUsesIdempotentReference() { - RecordingBlobStore store = new RecordingBlobStore(); - S3OffloadDataConverter converter = new S3OffloadDataConverter(store, "bucket", 1); - - byte[] first = converter.toData("large enough to offload"); - byte[] second = converter.toData("large enough to offload"); - String decoded = converter.fromData(first, String.class, String.class); - - assertEquals(S3OffloadDataConverter.OFFLOAD_PREFIX, first[0]); - assertArrayEquals(first, second); - assertEquals("large enough to offload", decoded); - assertEquals(1, store.blobs.size()); - } - - @Test - public void testS3OffloadConverterRejectsUnknownPrefix() { - S3OffloadDataConverter converter = - new S3OffloadDataConverter(new RecordingBlobStore(), "bucket", 1); - - try { - converter.fromData(new byte[] {0x7f}, String.class, String.class); - fail("expected unknown prefix to fail"); - } catch (DataConverterException e) { - assertTrue(e.getMessage().contains("unknown prefix")); - } - } - - @Test - public void testS3OffloadConverterValidatesConstructorInputs() { - expectIllegalArgument(() -> new S3OffloadDataConverter(null, "bucket", 1)); - expectIllegalArgument(() -> new S3OffloadDataConverter(new RecordingBlobStore(), " ", 1)); - expectIllegalArgument(() -> new S3OffloadDataConverter(new RecordingBlobStore(), "bucket", -1)); - } - - @Test - public void testLocalFsBlobStoreHashesUnsafeKeys() throws Exception { - Path baseDir = temporaryFolder.newFolder("blobs").toPath(); - LocalFsBlobStore store = new LocalFsBlobStore(baseDir); - byte[] data = new byte[] {1, 2, 3}; - - store.put("../escape", data); - store.put(".", data); - store.put("bucket\\nested/key", data); - - assertArrayEquals(data, store.get("../escape")); - assertArrayEquals(data, store.get(".")); - assertArrayEquals(data, store.get("bucket\\nested/key")); - try (Stream files = Files.list(baseDir)) { - assertEquals(3, files.filter(Files::isRegularFile).count()); - } - try (Stream files = Files.list(baseDir)) { - assertTrue(files.allMatch(path -> path.getFileName().toString().matches("[0-9a-f]{64}"))); - } - } - - private static void expectIllegalArgument(Runnable runnable) { - try { - runnable.run(); - fail("expected IllegalArgumentException"); - } catch (IllegalArgumentException expected) { - // Expected. - } - } - - private static final class RecordingBlobStore implements BlobStore { - final Map blobs = new LinkedHashMap<>(); - - @Override - public void put(String key, byte[] data) { - blobs.put(key, data); - } - - @Override - public byte[] get(String key) throws IOException { - byte[] data = blobs.get(key); - if (data == null) { - throw new IOException("missing key " + key); - } - return data; - } - } -} diff --git a/src/test/java/com/uber/cadence/samples/encryption/EncryptedJsonDataConverterTest.java b/src/test/java/com/uber/cadence/samples/encryption/EncryptedJsonDataConverterTest.java new file mode 100644 index 00000000..e3659787 --- /dev/null +++ b/src/test/java/com/uber/cadence/samples/encryption/EncryptedJsonDataConverterTest.java @@ -0,0 +1,114 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.encryption; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.uber.cadence.client.WorkflowClient; +import com.uber.cadence.client.WorkflowClientOptions; +import com.uber.cadence.client.WorkflowOptions; +import com.uber.cadence.converter.DataConverterException; +import com.uber.cadence.testing.TestEnvironmentOptions; +import com.uber.cadence.testing.TestWorkflowEnvironment; +import com.uber.cadence.worker.Worker; +import java.time.Duration; +import java.util.Arrays; +import org.junit.After; +import org.junit.Test; + +public class EncryptedJsonDataConverterTest { + + private TestWorkflowEnvironment testEnv; + + @After + public void tearDown() { + if (testEnv != null) { + testEnv.close(); + } + } + + @Test + public void testEncryptedConverterRoundTripAndRandomNonce() { + EncryptedJsonDataConverter converter = + new EncryptedJsonDataConverter(EncryptionKeyLoader.DEMO_ENCRYPTION_KEY); + EncryptedDataConverterWorkflow.SensitiveCustomerRecord record = + EncryptedDataConverterWorkflow.createSensitiveCustomerRecord(); + + byte[] first = converter.toData(record); + byte[] second = converter.toData(record); + + assertFalse(Arrays.equals(first, second)); + EncryptedDataConverterWorkflow.SensitiveCustomerRecord decoded = + converter.fromData( + first, + EncryptedDataConverterWorkflow.SensitiveCustomerRecord.class, + EncryptedDataConverterWorkflow.SensitiveCustomerRecord.class); + assertEquals(record.customerId, decoded.customerId); + assertEquals(record.ssn, decoded.ssn); + assertEquals(record.medicalNotes, decoded.medicalNotes); + } + + @Test + public void testEncryptedConverterRejectsShortCiphertext() { + EncryptedJsonDataConverter converter = + new EncryptedJsonDataConverter(EncryptionKeyLoader.DEMO_ENCRYPTION_KEY); + + try { + converter.fromData(new byte[] {1, 2, 3}, String.class, String.class); + fail("expected short ciphertext to fail"); + } catch (DataConverterException e) { + assertTrue(e.getMessage().contains("Ciphertext too short")); + } + } + + @Test + public void testEncryptedConverterWorksInWorkflowEnvironment() { + EncryptedJsonDataConverter converter = + new EncryptedJsonDataConverter(EncryptionKeyLoader.DEMO_ENCRYPTION_KEY); + TestEnvironmentOptions options = + new TestEnvironmentOptions.Builder() + .setWorkflowClientOptions( + WorkflowClientOptions.newBuilder().setDataConverter(converter).build()) + .build(); + testEnv = TestWorkflowEnvironment.newInstance(options); + Worker worker = testEnv.newWorker(EncryptedDataConverterWorkflow.TASK_LIST); + worker.registerWorkflowImplementationTypes(EncryptedDataConverterWorkflow.WorkflowImpl.class); + worker.registerActivitiesImplementations(new EncryptedDataConverterWorkflow.ActivitiesImpl()); + testEnv.start(); + + WorkflowClient workflowClient = + testEnv.newWorkflowClient( + WorkflowClientOptions.newBuilder().setDataConverter(converter).build()); + WorkflowOptions workflowOptions = + new WorkflowOptions.Builder() + .setTaskList(EncryptedDataConverterWorkflow.TASK_LIST) + .setExecutionStartToCloseTimeout(Duration.ofMinutes(1)) + .build(); + EncryptedDataConverterWorkflow.WorkflowIface workflow = + workflowClient.newWorkflowStub( + EncryptedDataConverterWorkflow.WorkflowIface.class, workflowOptions); + + EncryptedDataConverterWorkflow.SensitiveCustomerRecord result = workflow.run(); + + assertEquals("cust_8a7f3b2e", result.customerId); + assertEquals("workflow-processor-v2 (Encrypted)", result.processedBy); + } +} diff --git a/src/test/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterTest.java b/src/test/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterTest.java new file mode 100644 index 00000000..f8e9d310 --- /dev/null +++ b/src/test/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterTest.java @@ -0,0 +1,135 @@ +/* + * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License is + * located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.uber.cadence.samples.s3offload; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.uber.cadence.converter.DataConverterException; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.stream.Stream; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class S3OffloadDataConverterTest { + + @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Test + public void testS3OffloadConverterInlinesBelowThreshold() { + RecordingBlobStore store = new RecordingBlobStore(); + S3OffloadDataConverter converter = new S3OffloadDataConverter(store, "bucket", 1024); + + byte[] encoded = converter.toData("small"); + String decoded = converter.fromData(encoded, String.class, String.class); + + assertEquals(S3OffloadDataConverter.INLINE_PREFIX, encoded[0]); + assertEquals("small", decoded); + assertTrue(store.blobs.isEmpty()); + } + + @Test + public void testS3OffloadConverterOffloadsAndUsesIdempotentReference() { + RecordingBlobStore store = new RecordingBlobStore(); + S3OffloadDataConverter converter = new S3OffloadDataConverter(store, "bucket", 1); + + byte[] first = converter.toData("large enough to offload"); + byte[] second = converter.toData("large enough to offload"); + String decoded = converter.fromData(first, String.class, String.class); + + assertEquals(S3OffloadDataConverter.OFFLOAD_PREFIX, first[0]); + assertArrayEquals(first, second); + assertEquals("large enough to offload", decoded); + assertEquals(1, store.blobs.size()); + } + + @Test + public void testS3OffloadConverterRejectsUnknownPrefix() { + S3OffloadDataConverter converter = + new S3OffloadDataConverter(new RecordingBlobStore(), "bucket", 1); + + try { + converter.fromData(new byte[] {0x7f}, String.class, String.class); + fail("expected unknown prefix to fail"); + } catch (DataConverterException e) { + assertTrue(e.getMessage().contains("unknown prefix")); + } + } + + @Test + public void testS3OffloadConverterValidatesConstructorInputs() { + expectIllegalArgument(() -> new S3OffloadDataConverter(null, "bucket", 1)); + expectIllegalArgument(() -> new S3OffloadDataConverter(new RecordingBlobStore(), " ", 1)); + expectIllegalArgument(() -> new S3OffloadDataConverter(new RecordingBlobStore(), "bucket", -1)); + } + + @Test + public void testLocalFsBlobStoreHashesUnsafeKeys() throws Exception { + Path baseDir = temporaryFolder.newFolder("blobs").toPath(); + LocalFsBlobStore store = new LocalFsBlobStore(baseDir); + byte[] data = new byte[] {1, 2, 3}; + + store.put("../escape", data); + store.put(".", data); + store.put("bucket\\nested/key", data); + + assertArrayEquals(data, store.get("../escape")); + assertArrayEquals(data, store.get(".")); + assertArrayEquals(data, store.get("bucket\\nested/key")); + try (Stream files = Files.list(baseDir)) { + assertEquals(3, files.filter(Files::isRegularFile).count()); + } + try (Stream files = Files.list(baseDir)) { + assertTrue(files.allMatch(path -> path.getFileName().toString().matches("[0-9a-f]{64}"))); + } + } + + private static void expectIllegalArgument(Runnable runnable) { + try { + runnable.run(); + fail("expected IllegalArgumentException"); + } catch (IllegalArgumentException expected) { + // Expected. + } + } + + private static final class RecordingBlobStore implements BlobStore { + final Map blobs = new LinkedHashMap<>(); + + @Override + public void put(String key, byte[] data) { + blobs.put(key, data); + } + + @Override + public byte[] get(String key) throws IOException { + byte[] data = blobs.get(key); + if (data == null) { + throw new IOException("missing key " + key); + } + return data; + } + } +} From 9583361d4c3e64164b9882b4c1f7d75d8b99a1ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= Date: Fri, 15 May 2026 11:31:40 -0700 Subject: [PATCH 08/11] fix: undo nested try blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “Kevin” --- .../samples/compression/CompressedJsonDataConverter.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java b/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java index 4c734253..6a42203f 100644 --- a/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java +++ b/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java @@ -65,15 +65,14 @@ public byte[] toData(Object... values) throws DataConverterException { if (jsonBytes == null || jsonBytes.length == 0) { return jsonBytes; } + + ByteArrayOutputStream out = new ByteArrayOutputStream(); try { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - try (GZIPOutputStream gzip = new GZIPOutputStream(out)) { - gzip.write(jsonBytes); - } - return out.toByteArray(); + gzip.write(jsonBytes); } catch (IOException e) { throw new DataConverterException("Failed to gzip-compress JSON payload", e); } + return out.toByteArray(); } @Override From 390455aa7884c5c2314425ef0998f24b7dc717e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= Date: Fri, 15 May 2026 12:45:00 -0700 Subject: [PATCH 09/11] fix: missing import statement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “Kevin” --- .../samples/compression/CompressedJsonDataConverter.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java b/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java index 6a42203f..601b2643 100644 --- a/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java +++ b/src/main/java/com/uber/cadence/samples/compression/CompressedJsonDataConverter.java @@ -65,14 +65,14 @@ public byte[] toData(Object... values) throws DataConverterException { if (jsonBytes == null || jsonBytes.length == 0) { return jsonBytes; } - + ByteArrayOutputStream out = new ByteArrayOutputStream(); - try { + try (GZIPOutputStream gzip = new GZIPOutputStream(out)) { gzip.write(jsonBytes); } catch (IOException e) { throw new DataConverterException("Failed to gzip-compress JSON payload", e); } - return out.toByteArray(); + return out.toByteArray(); } @Override From 230dd3f3383e9c44fedc4fc4e00613a20e7cf656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin=E2=80=9D?= Date: Fri, 15 May 2026 12:54:35 -0700 Subject: [PATCH 10/11] chore(query): white space changes made by googleJavaFormat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “Kevin” --- .../samples/query/LunchVoteWorkflow.java | 21 ++++--- .../samples/query/MarkdownQueryWorkflow.java | 15 +++-- .../samples/query/OrderFulfillmentModels.java | 7 +-- .../query/OrderFulfillmentWorkflow.java | 63 ++++++++----------- .../samples/query/QuerySampleSupport.java | 3 +- .../cadence/samples/query/QueryWorker.java | 14 +++-- 6 files changed, 60 insertions(+), 63 deletions(-) diff --git a/src/main/java/com/uber/cadence/samples/query/LunchVoteWorkflow.java b/src/main/java/com/uber/cadence/samples/query/LunchVoteWorkflow.java index 24f3f124..80baa54a 100644 --- a/src/main/java/com/uber/cadence/samples/query/LunchVoteWorkflow.java +++ b/src/main/java/com/uber/cadence/samples/query/LunchVoteWorkflow.java @@ -39,8 +39,8 @@ private LunchVoteWorkflow() {} /** * Signal payload for a lunch vote. Public fields are required so Cadence's JSON data converter - * can deserialize the signal input. Field names must match the JSON keys in the Markdoc - * {@code input=} attribute (e.g. {@code input={"location":"Farmhouse","meal":"Red Thai Curry"}}). + * can deserialize the signal input. Field names must match the JSON keys in the Markdoc {@code + * input=} attribute (e.g. {@code input={"location":"Farmhouse","meal":"Red Thai Curry"}}). */ public static class LunchOrder { public String location; @@ -64,9 +64,10 @@ public LunchOrder(String location, String meal, String requests) { public interface WorkflowIface { @WorkflowMethod( - name = QueryConstants.LUNCH_VOTE_WORKFLOW_TYPE, - executionStartToCloseTimeoutSeconds = 700, - taskList = TASK_LIST) + name = QueryConstants.LUNCH_VOTE_WORKFLOW_TYPE, + executionStartToCloseTimeoutSeconds = 700, + taskList = TASK_LIST + ) void run(); /** Visible as "options" in the Cadence Web Query dropdown. */ @@ -74,8 +75,8 @@ public interface WorkflowIface { MarkdownFormattedResponse optionsQuery(); /** - * {@code name} sets the signal type string the worker listens for. It must match the - * {@code signalName} attribute in the Markdoc template so Cadence Web sends the right signal. + * {@code name} sets the signal type string the worker listens for. It must match the {@code + * signalName} attribute in the Markdoc template so Cadence Web sends the right signal. */ @SignalMethod(name = "lunch_order") void lunchOrder(LunchOrder vote); @@ -136,8 +137,10 @@ public MarkdownFormattedResponse optionsQuery() { return new MarkdownFormattedResponse(data); } - /** Builds a Markdoc {@code {%- signal -%}} tag. Every attribute is required for Cadence Web - * to route the signal to the correct workflow execution. */ + /** + * Builds a Markdoc {@code {%- signal -%}} tag. Every attribute is required for Cadence Web to + * route the signal to the correct workflow execution. + */ private static String signalBlock( String workflowId, String runId, String label, String jsonInput) { return "{% signal \n" diff --git a/src/main/java/com/uber/cadence/samples/query/MarkdownQueryWorkflow.java b/src/main/java/com/uber/cadence/samples/query/MarkdownQueryWorkflow.java index 31268e12..439c2e4e 100644 --- a/src/main/java/com/uber/cadence/samples/query/MarkdownQueryWorkflow.java +++ b/src/main/java/com/uber/cadence/samples/query/MarkdownQueryWorkflow.java @@ -43,17 +43,18 @@ private MarkdownQueryWorkflow() {} * *

    *
  • {@code @WorkflowMethod} — the entry point; loops waiting for signals. - *
  • {@code @QueryMethod} — returns a {@link MarkdownFormattedResponse} so Cadence Web - * renders interactive markdown instead of raw JSON. + *
  • {@code @QueryMethod} — returns a {@link MarkdownFormattedResponse} so Cadence Web renders + * interactive markdown instead of raw JSON. *
  • {@code @SignalMethod} — receives external input (from Markdoc buttons or the CLI). *
*/ public interface WorkflowIface { @WorkflowMethod( - name = QueryConstants.MARKDOWN_QUERY_WORKFLOW_TYPE, - executionStartToCloseTimeoutSeconds = 3600, - taskList = TASK_LIST) + name = QueryConstants.MARKDOWN_QUERY_WORKFLOW_TYPE, + executionStartToCloseTimeoutSeconds = 3600, + taskList = TASK_LIST + ) void run(); /** @@ -96,7 +97,9 @@ public static final class WorkflowImpl implements WorkflowIface { private String cachedWorkflowId = ""; private String cachedRunId = ""; - /** Set by {@link #refreshSuggestedStartWorkflowId()} in {@code run()} before any query executes. */ + /** + * Set by {@link #refreshSuggestedStartWorkflowId()} in {@code run()} before any query executes. + */ private String suggestedNewWorkflowId = ""; @Override diff --git a/src/main/java/com/uber/cadence/samples/query/OrderFulfillmentModels.java b/src/main/java/com/uber/cadence/samples/query/OrderFulfillmentModels.java index b0a6f1f0..3dbc4cfb 100644 --- a/src/main/java/com/uber/cadence/samples/query/OrderFulfillmentModels.java +++ b/src/main/java/com/uber/cadence/samples/query/OrderFulfillmentModels.java @@ -50,8 +50,7 @@ public static class Order { public String customerEmail = "alice.johnson@example.com"; public OrderItem[] items = new OrderItem[] { - new OrderItem("Wireless Headphones", 2, 79.99), - new OrderItem("Phone Case", 1, 19.99), + new OrderItem("Wireless Headphones", 2, 79.99), new OrderItem("Phone Case", 1, 19.99), }; public double totalAmount = 179.97; public String status = STATUS_PENDING_PAYMENT; @@ -86,8 +85,8 @@ public static class ActionLogEntry { /** * Signal POJOs below use public fields so the Cadence JSON data converter can deserialize them. * Field names must match the JSON keys in each Markdoc {@code input=} attribute; for example - * {@code input={"operator":"admin","reason":"Fraud"}} maps to {@link #operator} and - * {@link #reason}. + * {@code input={"operator":"admin","reason":"Fraud"}} maps to {@link #operator} and {@link + * #reason}. */ public static class RejectPaymentSignal { public String reason; diff --git a/src/main/java/com/uber/cadence/samples/query/OrderFulfillmentWorkflow.java b/src/main/java/com/uber/cadence/samples/query/OrderFulfillmentWorkflow.java index 9620aca1..18d95ce8 100644 --- a/src/main/java/com/uber/cadence/samples/query/OrderFulfillmentWorkflow.java +++ b/src/main/java/com/uber/cadence/samples/query/OrderFulfillmentWorkflow.java @@ -51,16 +51,17 @@ private OrderFulfillmentWorkflow() {} /** * Dashboard pattern: one query method renders the full markdown UI (tables, status, action - * buttons), and multiple signal methods drive state transitions on the order. The {@code name} - * on each {@code @SignalMethod} must match the {@code signalName} in the Markdoc template; - * without {@code name}, the Java SDK would default to {@code WorkflowIface::methodName}. + * buttons), and multiple signal methods drive state transitions on the order. The {@code name} on + * each {@code @SignalMethod} must match the {@code signalName} in the Markdoc template; without + * {@code name}, the Java SDK would default to {@code WorkflowIface::methodName}. */ public interface WorkflowIface { @WorkflowMethod( - name = QueryConstants.ORDER_FULFILLMENT_WORKFLOW_TYPE, - executionStartToCloseTimeoutSeconds = 3600, - taskList = TASK_LIST) + name = QueryConstants.ORDER_FULFILLMENT_WORKFLOW_TYPE, + executionStartToCloseTimeoutSeconds = 3600, + taskList = TASK_LIST + ) void run(); /** Visible as "dashboard" in the Cadence Web Query dropdown. */ @@ -101,9 +102,8 @@ public static final class WorkflowImpl implements WorkflowIface { /** * Inbox for signal-to-main-loop communication. Signal handlers (which execute on the workflow - * thread but outside the main loop) enqueue messages here. The main {@link #run()} loop - * drains the inbox one message at a time, keeping state transitions sequential and - * deterministic. + * thread but outside the main loop) enqueue messages here. The main {@link #run()} loop drains + * the inbox one message at a time, keeping state transitions sequential and deterministic. */ private final ArrayDeque inbox = new ArrayDeque<>(); @@ -136,8 +136,7 @@ public WorkflowImpl() { created.action = "Order Created"; created.operator = "System"; created.details = - String.format( - Locale.US, "Order %s created for %s", order.orderID, order.customerName); + String.format(Locale.US, "Order %s created for %s", order.orderID, order.customerName); actionLog.add(created); } @@ -214,15 +213,11 @@ private void handleShip(OrderFulfillmentModels.ShipOrderSignal signal) { "Order Shipped", getOperator(signal.operator), String.format( - Locale.US, - "Carrier: %s, Tracking: %s", - signal.carrier, - signal.trackingNumber))); + Locale.US, "Carrier: %s, Tracking: %s", signal.carrier, signal.trackingNumber))); } private void handleRefund(OrderFulfillmentModels.RefundSignal signal) { - if (!STATUS_PAYMENT_APPROVED.equals(order.status) - && !STATUS_SHIPPED.equals(order.status)) { + if (!STATUS_PAYMENT_APPROVED.equals(order.status) && !STATUS_SHIPPED.equals(order.status)) { return; } order.status = STATUS_REFUNDED; @@ -232,8 +227,7 @@ private void handleRefund(OrderFulfillmentModels.RefundSignal signal) { entry( "Refund Issued", getOperator(signal.operator), - String.format( - Locale.US, "Amount: $%.2f, Reason: %s", signal.amount, signal.reason))); + String.format(Locale.US, "Amount: $%.2f, Reason: %s", signal.amount, signal.reason))); } private void handleCancel(OrderFulfillmentModels.CancelOrderSignal signal) { @@ -260,7 +254,8 @@ private void handleMarkDelivered(OrderFulfillmentModels.SimpleSignal signal) { "Package confirmed delivered to customer")); } - private OrderFulfillmentModels.ActionLogEntry entry(String action, String operator, String details) { + private OrderFulfillmentModels.ActionLogEntry entry( + String action, String operator, String details) { OrderFulfillmentModels.ActionLogEntry e = new OrderFulfillmentModels.ActionLogEntry(); e.timestampMillis = Workflow.currentTimeMillis(); e.action = action; @@ -320,19 +315,12 @@ private String makeOrderDashboard() { String trackingRow = ""; if (order.trackingNum != null && !order.trackingNum.isEmpty()) { - trackingRow = - "\n| **Tracking** | " - + order.carrier - + " - " - + order.trackingNum - + " |"; + trackingRow = "\n| **Tracking** | " + order.carrier + " - " + order.trackingNum + " |"; } String refundRow = ""; if (order.refundAmount > 0) { refundRow = - "\n| **Refund** | $" - + String.format(Locale.US, "%.2f", order.refundAmount) - + " |"; + "\n| **Refund** | $" + String.format(Locale.US, "%.2f", order.refundAmount) + " |"; } return "\n## 🛒 Order Dashboard\n\n" @@ -420,7 +408,10 @@ private static String makeItemsTable(OrderFulfillmentModels.Order order) { String.format( Locale.US, "| %s | %d | $%.2f | $%.2f |\n", - item.name, item.quantity, item.price, subtotal)); + item.name, + item.quantity, + item.price, + subtotal)); } return table.toString(); } @@ -445,8 +436,8 @@ private static String makeActionHistory(List Date: Mon, 18 May 2026 09:39:46 -0700 Subject: [PATCH 11/11] refactor(claimcheck): rename s3offload sample to claimcheck MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: “Kevin” --- README.md | 10 +-- .../{s3offload => claimcheck}/BlobStore.java | 6 +- .../ClaimCheckDataConverter.java} | 59 ++++++++------ .../ClaimCheckDataConverterWorkflow.java} | 56 ++++++------- .../ClaimCheckStarter.java} | 28 +++---- .../ClaimCheckWorker.java} | 52 ++++++------ .../LocalFsBlobStore.java | 10 +-- .../uber/cadence/samples/claimcheck/README.md | 81 +++++++++++++++++++ .../uber/cadence/samples/s3offload/README.md | 79 ------------------ .../ClaimCheckDataConverterTest.java} | 31 +++---- 10 files changed, 211 insertions(+), 201 deletions(-) rename src/main/java/com/uber/cadence/samples/{s3offload => claimcheck}/BlobStore.java (87%) rename src/main/java/com/uber/cadence/samples/{s3offload/S3OffloadDataConverter.java => claimcheck/ClaimCheckDataConverter.java} (76%) rename src/main/java/com/uber/cadence/samples/{s3offload/S3OffloadDataConverterWorkflow.java => claimcheck/ClaimCheckDataConverterWorkflow.java} (72%) rename src/main/java/com/uber/cadence/samples/{s3offload/S3OffloadStarter.java => claimcheck/ClaimCheckStarter.java} (79%) rename src/main/java/com/uber/cadence/samples/{s3offload/S3OffloadWorker.java => claimcheck/ClaimCheckWorker.java} (65%) rename src/main/java/com/uber/cadence/samples/{s3offload => claimcheck}/LocalFsBlobStore.java (90%) create mode 100644 src/main/java/com/uber/cadence/samples/claimcheck/README.md delete mode 100644 src/main/java/com/uber/cadence/samples/s3offload/README.md rename src/test/java/com/uber/cadence/samples/{s3offload/S3OffloadDataConverterTest.java => claimcheck/ClaimCheckDataConverterTest.java} (77%) diff --git a/README.md b/README.md index 28eb3d01..1d1d2bc0 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ These samples demonstrate various capabilities of Java Cadence client and server * **DataConverter Samples** — three independent custom `DataConverter` patterns that transparently transform every workflow input, output, and activity parameter. Each lives in its own package and is fully standalone, so you can copy any one of them into your own project: * **Compression** ([`com.uber.cadence.samples.compression`](src/main/java/com/uber/cadence/samples/compression/)) — gzip-over-JSON; typically 60-80% size reduction for repetitive payloads. [README](src/main/java/com/uber/cadence/samples/compression/README.md). * **Encryption** ([`com.uber.cadence.samples.encryption`](src/main/java/com/uber/cadence/samples/encryption/)) — AES-256-GCM so payloads in Cadence history are unreadable without the key. [README](src/main/java/com/uber/cadence/samples/encryption/README.md). - * **S3 / claim-check offload** ([`com.uber.cadence.samples.s3offload`](src/main/java/com/uber/cadence/samples/s3offload/)) — payloads above a threshold are stored in an external `BlobStore`; only a small reference travels through history. [README](src/main/java/com/uber/cadence/samples/s3offload/README.md). + * **Claim-check offload** ([`com.uber.cadence.samples.claimcheck`](src/main/java/com/uber/cadence/samples/claimcheck/)) — payloads above a threshold are stored in an external `BlobStore` (S3, GCS, Azure Blob, MinIO, local disk); only a small reference travels through history. [README](src/main/java/com/uber/cadence/samples/claimcheck/README.md). ## Get the Samples @@ -162,12 +162,12 @@ See [src/main/java/com/uber/cadence/samples/encryption/README.md](src/main/java/ ./gradlew -q execute -PmainClass=com.uber.cadence.samples.encryption.EncryptionWorker ./gradlew -q execute -PmainClass=com.uber.cadence.samples.encryption.EncryptionStarter -#### S3 / claim-check offload +#### Claim-check offload -See [src/main/java/com/uber/cadence/samples/s3offload/README.md](src/main/java/com/uber/cadence/samples/s3offload/README.md) for the AWS SDK swap-in instructions. +See [src/main/java/com/uber/cadence/samples/claimcheck/README.md](src/main/java/com/uber/cadence/samples/claimcheck/README.md) for swap-in instructions for S3, GCS, Azure Blob, and MinIO. - ./gradlew -q execute -PmainClass=com.uber.cadence.samples.s3offload.S3OffloadWorker - ./gradlew -q execute -PmainClass=com.uber.cadence.samples.s3offload.S3OffloadStarter + ./gradlew -q execute -PmainClass=com.uber.cadence.samples.claimcheck.ClaimCheckWorker + ./gradlew -q execute -PmainClass=com.uber.cadence.samples.claimcheck.ClaimCheckStarter ### Trip Booking diff --git a/src/main/java/com/uber/cadence/samples/s3offload/BlobStore.java b/src/main/java/com/uber/cadence/samples/claimcheck/BlobStore.java similarity index 87% rename from src/main/java/com/uber/cadence/samples/s3offload/BlobStore.java rename to src/main/java/com/uber/cadence/samples/claimcheck/BlobStore.java index f560b57e..4c6e90fa 100644 --- a/src/main/java/com/uber/cadence/samples/s3offload/BlobStore.java +++ b/src/main/java/com/uber/cadence/samples/claimcheck/BlobStore.java @@ -15,14 +15,14 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.s3offload; +package com.uber.cadence.samples.claimcheck; import java.io.IOException; /** - * Abstraction over any external object store (local filesystem, S3, GCS, etc.). + * Abstraction over any external object store (local filesystem, S3, GCS, Azure Blob, etc.). * - *

{@link S3OffloadDataConverter} uses this interface to store large payloads outside Cadence + *

{@link ClaimCheckDataConverter} uses this interface to store large payloads outside Cadence * history. The default implementation is {@link LocalFsBlobStore}, which writes to the system * temporary directory and requires no external services. */ diff --git a/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverter.java b/src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckDataConverter.java similarity index 76% rename from src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverter.java rename to src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckDataConverter.java index b29e45a0..3256bffc 100644 --- a/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverter.java +++ b/src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckDataConverter.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.s3offload; +package com.uber.cadence.samples.claimcheck; import com.uber.cadence.converter.DataConverter; import com.uber.cadence.converter.DataConverterException; @@ -39,7 +39,7 @@ *

    *
  • {@code 0x00 || json} — payload is small enough to inline. *
  • {@code 0x01 || jsonEnvelope} — payload was offloaded; the envelope JSON has the form {@code - * {"s3Ref":"/"}}. + * {"blobRef":"/"}}. *
* *

Keys are derived from the SHA-256 of the payload so {@code toData} is idempotent across @@ -50,12 +50,19 @@ */ /* * ============================================================================= - * S3 BlobStore stub + * Swapping LocalFsBlobStore for a real object store * - * To use a real AWS S3 bucket instead of the local filesystem: - * 1. Add AWS SDK v2 to build.gradle: - * implementation group: 'software.amazon.awssdk', name: 's3', version: '2.25.0' - * 2. Implement BlobStore against software.amazon.awssdk.services.s3.S3Client: + * The DataConverter is storage-agnostic: any class that implements `BlobStore` (two methods, `put` + * and `get`) will work. Swap `new LocalFsBlobStore()` in ClaimCheckWorker for your own impl and the + * workflow/activity code stays the same. Backend pointers: + * + * - AWS S3: software.amazon.awssdk:s3:2.25.0 (S3Client + PutObjectRequest/GetObjectRequest) + * - GCS: com.google.cloud:google-cloud-storage (Storage.create(blobInfo, bytes)) + * - Azure Blob: com.azure:azure-storage-blob (BlobContainerClient.getBlobClient(...)) + * - MinIO / R2 / + * LocalStack: same as S3, just call S3Client.builder().endpointOverride(URI.create("...")) + * + * Reference S3 sketch using AWS SDK v2: * * public final class S3BlobStore implements BlobStore { * private final S3Client s3; @@ -78,18 +85,18 @@ * } * } * - * 3. Replace `new LocalFsBlobStore()` with `new S3BlobStore("my-bucket", "us-east-1")` in - * S3OffloadWorker. - * 4. Set standard AWS env vars (AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) or use an - * IAM instance role. - * - * You can also point the SDK at LocalStack or MinIO for local testing without a real AWS account. + * Wiring steps for any backend: + * 1. Add the backend's SDK as a runtime dependency in build.gradle. + * 2. Implement BlobStore against that SDK (≈30 lines, like the sketch above). + * 3. Replace `new LocalFsBlobStore()` with your `BlobStore` impl in ClaimCheckWorker. + * 4. Provide credentials via the SDK's standard mechanism (env vars, IAM role, etc.). * * Note on cleanup: this DataConverter does not delete blobs after the workflow completes. In - * production, use S3 object lifecycle policies to automatically expire old blobs. + * production, use the object store's lifecycle policies (S3 object lifecycle, GCS object lifecycle + * management, Azure Blob lifecycle management, etc.) to automatically expire old blobs. * ============================================================================= */ -public final class S3OffloadDataConverter implements DataConverter { +public final class ClaimCheckDataConverter implements DataConverter { /** Prefix byte for inline (below-threshold) payloads. */ static final byte INLINE_PREFIX = (byte) 0x00; @@ -104,12 +111,12 @@ public final class S3OffloadDataConverter implements DataConverter { private final int thresholdBytes; static final class BlobReference { - public String s3Ref; + public String blobRef; public BlobReference() {} - BlobReference(String s3Ref) { - this.s3Ref = s3Ref; + BlobReference(String blobRef) { + this.blobRef = blobRef; } } @@ -118,7 +125,7 @@ public BlobReference() {} * @param bucket logical bucket / prefix name embedded in the reference key. * @param thresholdBytes max inline payload size; larger payloads are offloaded. */ - public S3OffloadDataConverter(BlobStore store, String bucket, int thresholdBytes) { + public ClaimCheckDataConverter(BlobStore store, String bucket, int thresholdBytes) { if (store == null) { throw new IllegalArgumentException("store must not be null"); } @@ -190,26 +197,26 @@ private byte[] unwrap(byte[] content) throws DataConverterException { case INLINE_PREFIX: return body; case OFFLOAD_PREFIX: - String key = extractS3Ref(body); + String key = extractBlobRef(body); try { return store.get(key); } catch (IOException e) { throw new DataConverterException( - "s3 offload: failed to fetch payload from blob store (key=" + key + ")", e); + "claimcheck: failed to fetch payload from blob store (key=" + key + ")", e); } default: throw new DataConverterException( - "s3 offload: unknown prefix byte 0x" + String.format("%02x", prefix & 0xff), null); + "claimcheck: unknown prefix byte 0x" + String.format("%02x", prefix & 0xff), null); } } - private static String extractS3Ref(byte[] envelopeJson) throws DataConverterException { + private static String extractBlobRef(byte[] envelopeJson) throws DataConverterException { BlobReference reference = delegate.fromData(envelopeJson, BlobReference.class, BlobReference.class); - if (reference == null || reference.s3Ref == null || reference.s3Ref.isEmpty()) { - throw new DataConverterException("s3 offload: envelope missing s3Ref field", null); + if (reference == null || reference.blobRef == null || reference.blobRef.isEmpty()) { + throw new DataConverterException("claimcheck: envelope missing blobRef field", null); } - return reference.s3Ref; + return reference.blobRef; } private static String sha256Hex(byte[] data) throws DataConverterException { diff --git a/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterWorkflow.java b/src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckDataConverterWorkflow.java similarity index 72% rename from src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterWorkflow.java rename to src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckDataConverterWorkflow.java index 2d2456c0..4ed8b13a 100644 --- a/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterWorkflow.java +++ b/src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckDataConverterWorkflow.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.s3offload; +package com.uber.cadence.samples.claimcheck; import com.uber.cadence.activity.ActivityMethod; import com.uber.cadence.activity.ActivityOptions; @@ -34,23 +34,23 @@ *

The workflow takes no inputs and builds a payload well above the threshold internally so it * can be started from the Cadence CLI and every run exercises the offload path. */ -public final class S3OffloadDataConverterWorkflow { +public final class ClaimCheckDataConverterWorkflow { - private S3OffloadDataConverterWorkflow() {} + private ClaimCheckDataConverterWorkflow() {} - /** Task list polled by {@link S3OffloadWorker}. */ - public static final String TASK_LIST = "data-s3"; + /** Task list polled by {@link ClaimCheckWorker}. */ + public static final String TASK_LIST = "data-claimcheck"; /** * Registered workflow type, used for both {@code @WorkflowMethod} and CLI {@code workflow start}. */ - public static final String WORKFLOW_TYPE = "S3OffloadDataConverterWorkflow"; + public static final String WORKFLOW_TYPE = "ClaimCheckDataConverterWorkflow"; - /** Logical bucket / prefix embedded in S3-offload reference keys. */ - public static final String S3_BUCKET = "data-s3"; + /** Logical bucket / prefix embedded in claim-check reference keys. */ + public static final String BLOB_BUCKET = "claimcheck-blobs"; /** - * Payloads larger than this are offloaded to the BlobStore by {@link S3OffloadDataConverter}. + * Payloads larger than this are offloaded to the BlobStore by {@link ClaimCheckDataConverter}. * Cadence's default max payload is roughly 2 MB; the threshold is set intentionally low so the * demo workflow comfortably triggers offloading. */ @@ -58,31 +58,31 @@ private S3OffloadDataConverterWorkflow() {} // ---------------- POJOs ---------------- - public static final class S3LargePayload { + public static final class LargePayload { public String jobId; public String description; - public List dataPoints; + public List dataPoints; public Map metadata; public String processedBy; - public S3LargePayload() {} + public LargePayload() {} } - public static final class S3DataPoint { + public static final class DataPoint { public String timestamp; public String metric; public double value; public String tags; - public S3DataPoint() {} + public DataPoint() {} } /** * Builds a payload comfortably larger than {@link #DEFAULT_THRESHOLD_BYTES} so every workflow run * triggers an offload. */ - public static S3LargePayload createS3LargePayload() { - S3LargePayload p = new S3LargePayload(); + public static LargePayload createLargePayload() { + LargePayload p = new LargePayload(); p.jobId = "batch-job-20240115-001"; p.description = repeat( @@ -91,7 +91,7 @@ public static S3LargePayload createS3LargePayload() { p.dataPoints = new ArrayList<>(200); for (int i = 0; i < 200; i++) { - S3DataPoint dp = new S3DataPoint(); + DataPoint dp = new DataPoint(); dp.timestamp = String.format("2024-01-15T%02d:30:00Z", i % 24); dp.metric = String.format("telemetry.sensor_%03d.temperature", i); dp.value = 20.0 + (i % 30) / 10.0; @@ -103,7 +103,7 @@ public static S3LargePayload createS3LargePayload() { for (int i = 0; i < 20; i++) { p.metadata.put(String.format("batch_key_%02d", i), repeat("value-data-", 5)); } - p.processedBy = "s3-offload-worker-v1"; + p.processedBy = "claimcheck-worker-v1"; return p; } @@ -124,13 +124,13 @@ public interface WorkflowIface { executionStartToCloseTimeoutSeconds = 60, taskList = TASK_LIST ) - S3LargePayload run(); + LargePayload run(); } public interface Activities { @ActivityMethod(scheduleToCloseTimeoutSeconds = 60) - S3LargePayload processS3Payload(S3LargePayload payload); + LargePayload processPayload(LargePayload payload); } public static final class WorkflowImpl implements WorkflowIface { @@ -144,20 +144,20 @@ public static final class WorkflowImpl implements WorkflowIface { .build()); @Override - public S3LargePayload run() { - S3LargePayload payload = createS3LargePayload(); + public LargePayload run() { + LargePayload payload = createLargePayload(); - Workflow.getLogger(S3OffloadDataConverterWorkflow.class) + Workflow.getLogger(ClaimCheckDataConverterWorkflow.class) .info( - "S3 offload workflow started: job_id={}, data_points={}. Payload will be offloaded; only a reference travels through Cadence history.", + "Claim-check workflow started: job_id={}, data_points={}. Payload will be offloaded; only a reference travels through Cadence history.", payload.jobId, payload.dataPoints.size()); - S3LargePayload result = activities.processS3Payload(payload); + LargePayload result = activities.processPayload(payload); - Workflow.getLogger(S3OffloadDataConverterWorkflow.class) + Workflow.getLogger(ClaimCheckDataConverterWorkflow.class) .info( - "S3 offload workflow completed: job_id={}. Payload was transparently offloaded and retrieved via the BlobStore.", + "Claim-check workflow completed: job_id={}. Payload was transparently offloaded and retrieved via the BlobStore.", result.jobId); return result; } @@ -166,7 +166,7 @@ public S3LargePayload run() { public static final class ActivitiesImpl implements Activities { @Override - public S3LargePayload processS3Payload(S3LargePayload payload) { + public LargePayload processPayload(LargePayload payload) { payload.processedBy = payload.processedBy + " (Processed)"; return payload; } diff --git a/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadStarter.java b/src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckStarter.java similarity index 79% rename from src/main/java/com/uber/cadence/samples/s3offload/S3OffloadStarter.java rename to src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckStarter.java index 028e6096..3a1d1e4f 100644 --- a/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadStarter.java +++ b/src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckStarter.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.s3offload; +package com.uber.cadence.samples.claimcheck; import com.uber.cadence.client.WorkflowClient; import com.uber.cadence.client.WorkflowClientOptions; @@ -27,23 +27,23 @@ import java.util.UUID; /** - * Starts {@link S3OffloadDataConverterWorkflow} (async, fire-and-forget). + * Starts {@link ClaimCheckDataConverterWorkflow} (async, fire-and-forget). * *

The workflow takes no inputs and generates its own payload, so this starter does not need to - * use the matching {@link S3OffloadDataConverter}. The same effect can be achieved from the Cadence - * CLI via: + * use the matching {@link ClaimCheckDataConverter}. The same effect can be achieved from the + * Cadence CLI via: * *

  * cadence --domain samples-domain \
  *   workflow start \
- *   --workflow_type S3OffloadDataConverterWorkflow \
- *   --tl data-s3 \
+ *   --workflow_type ClaimCheckDataConverterWorkflow \
+ *   --tl data-claimcheck \
  *   --et 60
  * 
*/ -public final class S3OffloadStarter { +public final class ClaimCheckStarter { - private S3OffloadStarter() {} + private ClaimCheckStarter() {} public static void main(String[] args) { try { @@ -53,20 +53,20 @@ public static void main(String[] args) { WorkflowClientOptions.newBuilder().setDomain(SampleConstants.DOMAIN).build()); WorkflowOptions options = new WorkflowOptions.Builder() - .setTaskList(S3OffloadDataConverterWorkflow.TASK_LIST) + .setTaskList(ClaimCheckDataConverterWorkflow.TASK_LIST) .setExecutionStartToCloseTimeout(Duration.ofMinutes(1)) - .setWorkflowId("s3-offload-" + UUID.randomUUID()) + .setWorkflowId("claimcheck-" + UUID.randomUUID()) .build(); - S3OffloadDataConverterWorkflow.WorkflowIface workflow = - client.newWorkflowStub(S3OffloadDataConverterWorkflow.WorkflowIface.class, options); + ClaimCheckDataConverterWorkflow.WorkflowIface workflow = + client.newWorkflowStub(ClaimCheckDataConverterWorkflow.WorkflowIface.class, options); WorkflowClient.start(workflow::run); System.out.println( "Started " - + S3OffloadDataConverterWorkflow.WORKFLOW_TYPE + + ClaimCheckDataConverterWorkflow.WORKFLOW_TYPE + " on task list \"" - + S3OffloadDataConverterWorkflow.TASK_LIST + + ClaimCheckDataConverterWorkflow.TASK_LIST + "\"."); System.exit(0); } catch (RuntimeException e) { diff --git a/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadWorker.java b/src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckWorker.java similarity index 65% rename from src/main/java/com/uber/cadence/samples/s3offload/S3OffloadWorker.java rename to src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckWorker.java index 5852e231..7df275f6 100644 --- a/src/main/java/com/uber/cadence/samples/s3offload/S3OffloadWorker.java +++ b/src/main/java/com/uber/cadence/samples/claimcheck/ClaimCheckWorker.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.s3offload; +package com.uber.cadence.samples.claimcheck; import com.uber.cadence.client.WorkflowClient; import com.uber.cadence.client.WorkflowClientOptions; @@ -28,23 +28,23 @@ import com.uber.cadence.worker.WorkerFactory; /** - * Hosts the S3 offload (claim-check) sample worker. Constructs a {@link WorkflowClient} configured - * with {@link S3OffloadDataConverter} backed by {@link LocalFsBlobStore} so payloads above the - * threshold are stored on disk and replaced in Cadence history with a small reference. Swap in a - * real S3-backed {@link BlobStore} (see comments in {@link S3OffloadDataConverter}) to move blobs - * to S3 without changing any workflow or activity code. + * Hosts the claim-check sample worker. Constructs a {@link WorkflowClient} configured with {@link + * ClaimCheckDataConverter} backed by {@link LocalFsBlobStore} so payloads above the threshold are + * stored on disk and replaced in Cadence history with a small reference. Swap in any real {@link + * BlobStore} (S3, GCS, Azure Blob, MinIO — see comments in {@link ClaimCheckDataConverter}) to move + * blobs to a remote object store without changing any workflow or activity code. */ -public final class S3OffloadWorker { +public final class ClaimCheckWorker { - private S3OffloadWorker() {} + private ClaimCheckWorker() {} public static void main(String[] args) { LocalFsBlobStore blobStore = new LocalFsBlobStore(); DataConverter converter = - new S3OffloadDataConverter( + new ClaimCheckDataConverter( blobStore, - S3OffloadDataConverterWorkflow.S3_BUCKET, - S3OffloadDataConverterWorkflow.DEFAULT_THRESHOLD_BYTES); + ClaimCheckDataConverterWorkflow.BLOB_BUCKET, + ClaimCheckDataConverterWorkflow.DEFAULT_THRESHOLD_BYTES); WorkflowClient client = WorkflowClient.newInstance( new Thrift2ProtoAdapter(IGrpcServiceStubs.newInstance()), @@ -54,16 +54,16 @@ public static void main(String[] args) { .build()); WorkerFactory factory = WorkerFactory.newInstance(client); - Worker worker = factory.newWorker(S3OffloadDataConverterWorkflow.TASK_LIST); - worker.registerWorkflowImplementationTypes(S3OffloadDataConverterWorkflow.WorkflowImpl.class); - worker.registerActivitiesImplementations(new S3OffloadDataConverterWorkflow.ActivitiesImpl()); + Worker worker = factory.newWorker(ClaimCheckDataConverterWorkflow.TASK_LIST); + worker.registerWorkflowImplementationTypes(ClaimCheckDataConverterWorkflow.WorkflowImpl.class); + worker.registerActivitiesImplementations(new ClaimCheckDataConverterWorkflow.ActivitiesImpl()); factory.start(); - printS3OffloadStats(blobStore); + printClaimCheckStats(blobStore); System.out.println( - "S3OffloadWorker listening on \"" - + S3OffloadDataConverterWorkflow.TASK_LIST + "ClaimCheckWorker listening on \"" + + ClaimCheckDataConverterWorkflow.TASK_LIST + "\" (domain \"" + SampleConstants.DOMAIN + "\")."); @@ -71,24 +71,24 @@ public static void main(String[] args) { Runtime.getRuntime().addShutdownHook(new Thread(factory::shutdown)); } - private static void printS3OffloadStats(LocalFsBlobStore store) { - S3OffloadDataConverterWorkflow.S3LargePayload payload = - S3OffloadDataConverterWorkflow.createS3LargePayload(); + private static void printClaimCheckStats(LocalFsBlobStore store) { + ClaimCheckDataConverterWorkflow.LargePayload payload = + ClaimCheckDataConverterWorkflow.createLargePayload(); byte[] jsonBytes = JsonDataConverter.getInstance().toData(payload); int jsonSize = jsonBytes == null ? 0 : jsonBytes.length; - // History footprint = 1 prefix byte + JSON envelope {"s3Ref":"/"}. + // History footprint = 1 prefix byte + JSON envelope {"blobRef":"/"}. // SHA-256 hex digest is 64 chars; bucket + "/" + 64 hex chars. int cadenceBytes = 1 - + ("{\"s3Ref\":\"" - + S3OffloadDataConverterWorkflow.S3_BUCKET + + ("{\"blobRef\":\"" + + ClaimCheckDataConverterWorkflow.BLOB_BUCKET + "/" + repeatChar('a', 64) + "\"}") .length(); System.out.println(); - System.out.println("=== S3 Offload Sample Statistics ==="); + System.out.println("=== Claim-Check Sample Statistics ==="); System.out.printf( "Full payload JSON size: %d bytes (%.2f KB)%n", jsonSize, jsonSize / 1024.0); System.out.printf( @@ -102,8 +102,8 @@ private static void printS3OffloadStats(LocalFsBlobStore store) { System.out.printf( "Start workflow: cadence --domain %s workflow start --tl %s --workflow_type %s --et 60%n", SampleConstants.DOMAIN, - S3OffloadDataConverterWorkflow.TASK_LIST, - S3OffloadDataConverterWorkflow.WORKFLOW_TYPE); + ClaimCheckDataConverterWorkflow.TASK_LIST, + ClaimCheckDataConverterWorkflow.WORKFLOW_TYPE); System.out.println("====================================="); System.out.println(); } diff --git a/src/main/java/com/uber/cadence/samples/s3offload/LocalFsBlobStore.java b/src/main/java/com/uber/cadence/samples/claimcheck/LocalFsBlobStore.java similarity index 90% rename from src/main/java/com/uber/cadence/samples/s3offload/LocalFsBlobStore.java rename to src/main/java/com/uber/cadence/samples/claimcheck/LocalFsBlobStore.java index 9e55a9a1..b9394e77 100644 --- a/src/main/java/com/uber/cadence/samples/s3offload/LocalFsBlobStore.java +++ b/src/main/java/com/uber/cadence/samples/claimcheck/LocalFsBlobStore.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.s3offload; +package com.uber.cadence.samples.claimcheck; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -28,16 +28,16 @@ /** * {@link BlobStore} implementation backed by the local filesystem. * - *

The default zero-config implementation used by {@link S3OffloadDataConverter} when running the - * demo without real AWS. Files are written under {@code - * ${java.io.tmpdir}/cadence-java-samples-data-s3/}. + *

The default zero-config implementation used by {@link ClaimCheckDataConverter} when running + * the demo without a real object store. Files are written under {@code + * ${java.io.tmpdir}/cadence-java-samples-claimcheck/}. */ public final class LocalFsBlobStore implements BlobStore { private final Path baseDir; public LocalFsBlobStore() { - this(Paths.get(System.getProperty("java.io.tmpdir"), "cadence-java-samples-data-s3")); + this(Paths.get(System.getProperty("java.io.tmpdir"), "cadence-java-samples-claimcheck")); } public LocalFsBlobStore(Path baseDir) { diff --git a/src/main/java/com/uber/cadence/samples/claimcheck/README.md b/src/main/java/com/uber/cadence/samples/claimcheck/README.md new file mode 100644 index 00000000..ae46e9de --- /dev/null +++ b/src/main/java/com/uber/cadence/samples/claimcheck/README.md @@ -0,0 +1,81 @@ +# Claim-Check DataConverter Sample + +A custom Cadence [`DataConverter`](../../../../../../../../README.md) that implements the **[claim-check pattern](https://www.enterpriseintegrationpatterns.com/patterns/messaging/StoreInLibrary.html)**: payloads larger than a configurable threshold are stored in an external `BlobStore` (S3, GCS, Azure Blob, MinIO, local disk, etc.) and only a small reference travels through Cadence workflow history. + +This solves Cadence's per-payload size limits (~2 MB) for workflows that pass very large datasets, and lowers history storage cost for long-running workflows that pass large repeatable data. + +- **Task list:** `data-claimcheck` +- **Workflow type:** `ClaimCheckDataConverterWorkflow` +- **Default threshold:** 4 KB (deliberately low so the demo always offloads) +- **Default backing store:** [`LocalFsBlobStore`](LocalFsBlobStore.java) writing to `${java.io.tmpdir}/cadence-java-samples-claimcheck/` + +## Prerequisites + +1. Cadence server running (e.g. Docker Compose from the [Cadence repo](https://github.com/uber/cadence)). +2. From the repo root, build: `./gradlew build`. + +### Register the domain (required once per cluster) + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain +``` + +Or with the Cadence CLI: + +```bash +cadence --domain samples-domain domain register +``` + +## Run the worker (terminal 1) + +The worker prints a claim-check statistics banner showing how much was offloaded to the blob store vs how little ends up in Cadence history, then begins polling the `data-claimcheck` task list: + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.claimcheck.ClaimCheckWorker +``` + +## Start a workflow (terminal 2) + +```bash +./gradlew -q execute -PmainClass=com.uber.cadence.samples.claimcheck.ClaimCheckStarter +``` + +Or from the Cadence CLI: + +```bash +cadence --domain samples-domain \ + workflow start \ + --workflow_type ClaimCheckDataConverterWorkflow \ + --tl data-claimcheck \ + --et 60 +``` + +## How it works + +- `toData`: JSON-encode the arguments with the standard `JsonDataConverter`. If the resulting bytes are at or below the threshold, write `0x00 || json` and return inline. Otherwise compute a SHA-256 of the bytes, `PUT` to the blob store under `/`, and return `0x01 || json({"blobRef":"/"})`. Using the content hash as the key makes `toData` idempotent across Cadence workflow replays. +- `fromData` / `fromDataArray`: read the 1-byte prefix; inline payloads pass straight to `JsonDataConverter`, offloaded payloads first fetch the blob via `BlobStore.get`. +- Cleanup: this sample does not delete blobs after the workflow completes. In production, use the backing object store's lifecycle policies (S3 / GCS / Azure Blob lifecycle management) to expire old blobs automatically. + +> Note on the wire format: the `blobRef` field name is persisted in Cadence workflow history. In a real deployment, treat the envelope JSON as a versioned wire format — renaming the field later would break replay of in-flight workflows. Either pin the name forever or include a `version` field from day one. + +## Swapping `LocalFsBlobStore` for a real object store + +The DataConverter is storage-agnostic: any class that implements `BlobStore` (two methods, `put` and `get`) will work. Swap `new LocalFsBlobStore()` in [`ClaimCheckWorker`](ClaimCheckWorker.java) for your own impl and the workflow/activity code stays the same. The header comment in [`ClaimCheckDataConverter.java`](ClaimCheckDataConverter.java) sketches an `S3BlobStore` using AWS SDK v2; brief pointers for other backends: + +| Backend | Dependency | Notes | +|---------|------------|-------| +| AWS S3 | `software.amazon.awssdk:s3:2.25.0` | Reference sketch in the converter's header comment. Credentials via standard AWS env vars or IAM role. | +| Google Cloud Storage | `com.google.cloud:google-cloud-storage` | `Storage.create(BlobInfo, byte[])` / `Storage.readAllBytes(BlobId)`. ADC for auth. | +| Azure Blob Storage | `com.azure:azure-storage-blob` | `BlobContainerClient.getBlobClient(key).upload(...)`. Connection string or `DefaultAzureCredential`. | +| MinIO / LocalStack / Cloudflare R2 | same as S3 (`awssdk:s3`) | Set `S3Client.builder().endpointOverride(URI.create("http://localhost:9000"))`. | + +## Source layout + +| File | Purpose | +|------|---------| +| [`BlobStore.java`](BlobStore.java) | Two-method abstraction over any object store | +| [`LocalFsBlobStore.java`](LocalFsBlobStore.java) | Zero-config implementation writing to the temp dir | +| [`ClaimCheckDataConverter.java`](ClaimCheckDataConverter.java) | The custom `DataConverter`; also contains backend pointers | +| [`ClaimCheckDataConverterWorkflow.java`](ClaimCheckDataConverterWorkflow.java) | Workflow + activity + sample `LargePayload` POJOs and generator | +| [`ClaimCheckWorker.java`](ClaimCheckWorker.java) | Worker main; wires the converter into `WorkflowClientOptions` and prints the stats banner | +| [`ClaimCheckStarter.java`](ClaimCheckStarter.java) | Thin async starter | diff --git a/src/main/java/com/uber/cadence/samples/s3offload/README.md b/src/main/java/com/uber/cadence/samples/s3offload/README.md deleted file mode 100644 index 2290247d..00000000 --- a/src/main/java/com/uber/cadence/samples/s3offload/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# S3 Offload (Claim-Check) DataConverter Sample - -A custom Cadence [`DataConverter`](../../../../../../../../README.md) that implements the **claim-check pattern**: payloads larger than a configurable threshold are stored in an external `BlobStore` (S3 / GCS / local disk) and only a small reference travels through Cadence workflow history. - -This solves Cadence's per-payload size limits (~2 MB) for workflows that pass very large datasets, and lowers history storage cost for long-running workflows that pass large repeatable data. - -- **Task list:** `data-s3` -- **Workflow type:** `S3OffloadDataConverterWorkflow` -- **Default threshold:** 4 KB (deliberately low so the demo always offloads) -- **Default backing store:** [`LocalFsBlobStore`](LocalFsBlobStore.java) writing to `${java.io.tmpdir}/cadence-java-samples-data-s3/` - -## Prerequisites - -1. Cadence server running (e.g. Docker Compose from the [Cadence repo](https://github.com/uber/cadence)). -2. From the repo root, build: `./gradlew build`. - -### Register the domain (required once per cluster) - -```bash -./gradlew -q execute -PmainClass=com.uber.cadence.samples.common.RegisterDomain -``` - -Or with the Cadence CLI: - -```bash -cadence --domain samples-domain domain register -``` - -## Run the worker (terminal 1) - -The worker prints an S3-offload statistics banner showing how much was offloaded to the blob store vs how little ends up in Cadence history, then begins polling the `data-s3` task list: - -```bash -./gradlew -q execute -PmainClass=com.uber.cadence.samples.s3offload.S3OffloadWorker -``` - -## Start a workflow (terminal 2) - -```bash -./gradlew -q execute -PmainClass=com.uber.cadence.samples.s3offload.S3OffloadStarter -``` - -Or from the Cadence CLI: - -```bash -cadence --domain samples-domain \ - workflow start \ - --workflow_type S3OffloadDataConverterWorkflow \ - --tl data-s3 \ - --et 60 -``` - -## How it works - -- `toData`: JSON-encode the arguments with the standard `JsonDataConverter`. If the resulting bytes are at or below the threshold, write `0x00 || json` and return inline. Otherwise compute a SHA-256 of the bytes, `PUT` to the blob store under `/`, and return `0x01 || json({"s3Ref":"/"})`. Using the content hash as the key makes `toData` idempotent across Cadence workflow replays. -- `fromData` / `fromDataArray`: read the 1-byte prefix; inline payloads pass straight to `JsonDataConverter`, offloaded payloads first fetch the blob via `BlobStore.get`. -- Cleanup: this sample does not delete blobs after the workflow completes. In production, use S3 object lifecycle policies to expire old blobs automatically. - -## Swapping `LocalFsBlobStore` for real S3 - -The header comment in [`S3OffloadDataConverter.java`](S3OffloadDataConverter.java) sketches an `S3BlobStore` implementation using AWS SDK v2: - -1. Add `software.amazon.awssdk:s3:2.25.0` to `build.gradle`. -2. Implement `BlobStore` against `software.amazon.awssdk.services.s3.S3Client`. -3. Replace `new LocalFsBlobStore()` with `new S3BlobStore("my-bucket", "us-east-1")` in [`S3OffloadWorker`](S3OffloadWorker.java). -4. Provide credentials via standard AWS env vars or an IAM instance role. - -Point the SDK at LocalStack or MinIO for local testing without a real AWS account. - -## Source layout - -| File | Purpose | -|------|---------| -| [`BlobStore.java`](BlobStore.java) | Two-method abstraction over any object store | -| [`LocalFsBlobStore.java`](LocalFsBlobStore.java) | Zero-config implementation writing to the temp dir | -| [`S3OffloadDataConverter.java`](S3OffloadDataConverter.java) | The custom `DataConverter`; also contains the S3 stub | -| [`S3OffloadDataConverterWorkflow.java`](S3OffloadDataConverterWorkflow.java) | Workflow + activity + sample `S3LargePayload` POJOs and generator | -| [`S3OffloadWorker.java`](S3OffloadWorker.java) | Worker main; wires the converter into `WorkflowClientOptions` and prints the stats banner | -| [`S3OffloadStarter.java`](S3OffloadStarter.java) | Thin async starter | diff --git a/src/test/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterTest.java b/src/test/java/com/uber/cadence/samples/claimcheck/ClaimCheckDataConverterTest.java similarity index 77% rename from src/test/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterTest.java rename to src/test/java/com/uber/cadence/samples/claimcheck/ClaimCheckDataConverterTest.java index f8e9d310..ffce666c 100644 --- a/src/test/java/com/uber/cadence/samples/s3offload/S3OffloadDataConverterTest.java +++ b/src/test/java/com/uber/cadence/samples/claimcheck/ClaimCheckDataConverterTest.java @@ -15,7 +15,7 @@ * permissions and limitations under the License. */ -package com.uber.cadence.samples.s3offload; +package com.uber.cadence.samples.claimcheck; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; @@ -33,42 +33,42 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; -public class S3OffloadDataConverterTest { +public class ClaimCheckDataConverterTest { @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); @Test - public void testS3OffloadConverterInlinesBelowThreshold() { + public void testClaimCheckConverterInlinesBelowThreshold() { RecordingBlobStore store = new RecordingBlobStore(); - S3OffloadDataConverter converter = new S3OffloadDataConverter(store, "bucket", 1024); + ClaimCheckDataConverter converter = new ClaimCheckDataConverter(store, "bucket", 1024); byte[] encoded = converter.toData("small"); String decoded = converter.fromData(encoded, String.class, String.class); - assertEquals(S3OffloadDataConverter.INLINE_PREFIX, encoded[0]); + assertEquals(ClaimCheckDataConverter.INLINE_PREFIX, encoded[0]); assertEquals("small", decoded); assertTrue(store.blobs.isEmpty()); } @Test - public void testS3OffloadConverterOffloadsAndUsesIdempotentReference() { + public void testClaimCheckConverterOffloadsAndUsesIdempotentReference() { RecordingBlobStore store = new RecordingBlobStore(); - S3OffloadDataConverter converter = new S3OffloadDataConverter(store, "bucket", 1); + ClaimCheckDataConverter converter = new ClaimCheckDataConverter(store, "bucket", 1); byte[] first = converter.toData("large enough to offload"); byte[] second = converter.toData("large enough to offload"); String decoded = converter.fromData(first, String.class, String.class); - assertEquals(S3OffloadDataConverter.OFFLOAD_PREFIX, first[0]); + assertEquals(ClaimCheckDataConverter.OFFLOAD_PREFIX, first[0]); assertArrayEquals(first, second); assertEquals("large enough to offload", decoded); assertEquals(1, store.blobs.size()); } @Test - public void testS3OffloadConverterRejectsUnknownPrefix() { - S3OffloadDataConverter converter = - new S3OffloadDataConverter(new RecordingBlobStore(), "bucket", 1); + public void testClaimCheckConverterRejectsUnknownPrefix() { + ClaimCheckDataConverter converter = + new ClaimCheckDataConverter(new RecordingBlobStore(), "bucket", 1); try { converter.fromData(new byte[] {0x7f}, String.class, String.class); @@ -79,10 +79,11 @@ public void testS3OffloadConverterRejectsUnknownPrefix() { } @Test - public void testS3OffloadConverterValidatesConstructorInputs() { - expectIllegalArgument(() -> new S3OffloadDataConverter(null, "bucket", 1)); - expectIllegalArgument(() -> new S3OffloadDataConverter(new RecordingBlobStore(), " ", 1)); - expectIllegalArgument(() -> new S3OffloadDataConverter(new RecordingBlobStore(), "bucket", -1)); + public void testClaimCheckConverterValidatesConstructorInputs() { + expectIllegalArgument(() -> new ClaimCheckDataConverter(null, "bucket", 1)); + expectIllegalArgument(() -> new ClaimCheckDataConverter(new RecordingBlobStore(), " ", 1)); + expectIllegalArgument( + () -> new ClaimCheckDataConverter(new RecordingBlobStore(), "bucket", -1)); } @Test