diff --git a/prompts.txt b/prompts.txt new file mode 100644 index 000000000..9d70f0a89 --- /dev/null +++ b/prompts.txt @@ -0,0 +1,132 @@ +# AI Tooling Prompts Used + +Prompt 1 : Initial Implementation +I am modifying an ANTLR grammar for a Java project. Help me write lexer and parser rules in Directives.g4 file for +two new tokens: + +BYTE_SIZE → matches values like "10KB", "2.5MB", "1GB". + +TIME_DURATION → matches values like "5ms", "3.2s", "1min". + +Also, include helpful fragments like BYTE_UNIT, TIME_UNIT. Finally, show how to update the value parser rule +(or create byteSizeArg, timeDurationArg if needed) so the new tokens are accepted as directive arguments. + + + +Prompt 2: Create ByteSize and TimeDuration Token Classes + Help me in building two new token classes: + +ByteSize.java and TimeDuration.java + +Each class should: + +1. Extension of io.cdap.wrangler.api.parser.Token + +2. Parse strings like "10KB", "2.5MB", "1GB" (for ByteSize) and "500ms", "1.2s", "3min" (for TimeDuration) + +3. Store the value in canonical units (bytes for ByteSize, milliseconds or nanoseconds for TimeDuration) + +Provide getter methods like getBytes() and getMilliseconds() + + +Prompt 3: Update Token Types and Directive Argument Support +Help me in extending a token parsing framework in Java for a data transformation tool. I need to: + +Add two new token types: BYTE_SIZE and TIME_DURATION in the token registry or enum used (if any). + +Update the logic that defines valid argument types in directives, +so that BYTE_SIZE and TIME_DURATION can be accepted where appropriate. + +Mention any necessary updates in registration/configuration files or classes if applicable. + + + +Prompt 4: Add Visitor Methods for New Parser Rules + +In my ANTLR-based Java parser for a directive language, + +I want to add two new parser rules: byteSizeArg and timeDurationArg. Help me in implementing: + +1. To implement visitor methods visitByteSizeArg and visitTimeDurationArg in the appropriate visitor or parser class. + +2. These methods should return instances of ByteSize and TimeDuration tokens respectively using ctx.getText(). + +Ensure these token instances are added to the TokenGroup for the directive being parsed. + + + +Prompt 5: Implement New AggregateStats Directive + +I’m creating a new directive class called AggregateStats in a Java-based data transformation engine. Help me in implementing the Directive Interface: + +1. Accept at least 4 arguments: + +Source column (byte sizes) + +Source column (time durations) + +Target column for total size + +Target column for total/average time + +2. Optionally accept: + +Aggregation type (total, avg) + +Output unit (MB, GB, seconds, minutes) + +In initialize, store the argument values + +In execute, use ExecutorContext.getStore() to: + +Accumulate byte size and time duration values (convert to canonical units) + +In finalize, return a single Row with converted results (e.g., MB, seconds) + + +Prompt 6: Write Unit Tests for ByteSize and TimeDuration + +I need to write JUnit tests for one Java class: ByteSize and TimeDuration. + +1. These class parse strings like "10KB" and "500ms" respectively. + +2. Test valid cases: "10KB", "1.5MB", "1GB" for ByteSize and "500ms", "2s", "1min" for TimeDuration. + +3. Verify that getBytes() or getMilliseconds() return the correct canonical values. + +Include a few invalid input tests and assert that they throw proper exceptions. + + + + +Prompt 7: Write Parser Tests for New Grammar + +I’ve added BYTE_SIZE and TIME_DURATION tokens to an ANTLR grammar. Help me write parser tests in Java to: + +Validate that inputs like "10KB", "1.5MB", "5ms", "3min" are accepted in directive recipes. + +Use test classes like GrammarBasedParserTest.java or RecipeCompilerTest.java. + +Also test invalid values (e.g., "10KBB", "1..5MB", "ms5") and ensure they are rejected. + + + + +Prompt 8: Write Integration Test for AggregateStats Directive + +I’ve created an AggregateStats directive that aggregates byte size and time duration columns. Help me write an integration test using TestingRig to: + +Create input data: List with columns like data_transfer_size and response_time using values like "1MB", "500KB", "2s", "500ms". + +Define recipe like: + +java + +String[] recipe = new String[] { + "aggregate-stats :data_transfer_size :response_time total_size_mb total_time_sec" +}; +Execute with TestingRig.execute(recipe, rows) + +Assert that the resulting row contains correct aggregated values (in MB and seconds) + +Use a delta tolerance (e.g., 0.001) for comparing float values diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java new file mode 100644 index 000000000..29efa9002 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java @@ -0,0 +1,77 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + + package io.cdap.wrangler.api.parser; + + import com.google.gson.JsonElement; + import com.google.gson.JsonObject; + import io.cdap.wrangler.api.annotations.PublicEvolving; + + /** + * Represents a byte size value with units (KB, MB, GB, TB). + */ + @PublicEvolving + public class ByteSize implements Token { + private final String value; + private final long bytes; + + public ByteSize(String value) { + this.value = value; + this.bytes = parseBytes(value); + } + + private static long parseBytes(String value) { + String number = value.replaceAll("[^0-9.]", ""); + String unit = value.replaceAll("[0-9.]", "").toLowerCase(); + double size = Double.parseDouble(number); + + switch (unit) { + case "kb": + return (long) (size * 1024); + case "mb": + return (long) (size * 1024 * 1024); + case "gb": + return (long) (size * 1024 * 1024 * 1024); + case "tb": + return (long) (size * 1024L * 1024L * 1024L * 1024L); + default: + return (long) size; // Base unit bytes + } + } + + @Override + public String value() { + return value; + } + + public long getBytes() { + return bytes; + } + + @Override + public TokenType type() { + return TokenType.BYTE_SIZE; + } + + @Override + public JsonElement toJson() { + JsonObject object = new JsonObject(); + object.addProperty("type", TokenType.BYTE_SIZE.name()); + object.addProperty("value", value); + object.addProperty("bytes", bytes); + return object; + } + } diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java new file mode 100644 index 000000000..e5891ba63 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java @@ -0,0 +1,85 @@ +/* + * Copyright © 2024 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + + package io.cdap.wrangler.api.parser; + + import com.google.gson.JsonElement; + import com.google.gson.JsonObject; + import io.cdap.wrangler.api.annotations.PublicEvolving; + + /** + * Represents a time duration value with units (ms, s, m, h, d). + */ + @PublicEvolving + public class TimeDuration implements Token { + private final String value; + private final long milliseconds; + + public TimeDuration(String value) { + this.value = value; + this.milliseconds = parseMilliseconds(value); + } + + private static long parseMilliseconds(String value) { + String number = value.replaceAll("[^0-9.]", ""); + String unit = value.replaceAll("[0-9.]", "").toLowerCase(); + double duration = Double.parseDouble(number); + + switch (unit) { + case "ms": + return (long) duration; + case "s": + return (long) (duration * 1000); + case "m": + case "min": + return (long) (duration * 60 * 1000); + case "h": + return (long) (duration * 60 * 60 * 1000); + case "d": + return (long) (duration * 24 * 60 * 60 * 1000); + case "us": + return (long) (duration / 1000.0); // Convert microseconds to milliseconds + case "ns": + return (long) (duration / 1000000.0); // Convert nanoseconds to milliseconds + default: + return (long) duration; // Default case +} + + } + + @Override + public String value() { + return value; + } + + public long getMilliseconds() { + return milliseconds; + } + + @Override + public TokenType type() { + return TokenType.TIME_DURATION; + } + + @Override + public JsonElement toJson() { + JsonObject object = new JsonObject(); + object.addProperty("type", TokenType.TIME_DURATION.name()); + object.addProperty("value", value); + object.addProperty("milliseconds", milliseconds); + return object; + } + } diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java index 8c93b0e6a..30ea5c9d3 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java @@ -152,5 +152,11 @@ public enum TokenType implements Serializable { * Represents the enumerated type for the object of type {@code String} with restrictions * on characters that can be present in a string. */ - IDENTIFIER + + BYTE_SIZE, + + TIME_DURATION, + + + IDENTIFIER; } diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/UsageDefinition.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/UsageDefinition.java index 78800b7d1..d303ab108 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/UsageDefinition.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/UsageDefinition.java @@ -14,26 +14,25 @@ * the License. */ -package io.cdap.wrangler.api.parser; + package io.cdap.wrangler.api.parser; -import io.cdap.wrangler.api.Optional; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; + import io.cdap.wrangler.api.Optional; + import java.io.Serializable; + import java.util.ArrayList; + import java.util.List; /** - * This class {@link UsageDefinition} provides a way for users to registers the argument for UDDs. + * This class {@link UsageDefinition} provides a way for users to register the argument for UDDs. * * {@link UsageDefinition} is a collection of {@link TokenDefinition} and the name of the directive * itself. Each token specification has an associated ordinal that can be used to position the argument * within the directive. * - * Following is a example of how this class can be used. + * Following is an example of how this class can be used. * * UsageDefinition.Builder builder = UsageDefinition.builder(); * builder.add("col1", TypeToken.COLUMN_NAME); // By default, this field is required. - * builder.add("col2", TypeToken.COLUMN_NAME, false); // This is a optional field. + * builder.add("col2", TypeToken.COLUMN_NAME, false); // This is an optional field. * builder.add("expression", TypeToken.EXPRESSION); * UsageDefinition definition = builder.build(); * @@ -43,202 +42,206 @@ * @see TokenDefinition */ public final class UsageDefinition implements Serializable { - // transient so it doesn't show up when serialized using gson in service endpoint responses - private final transient int optionalCnt; - private final String directive; - private final List tokens; - - private UsageDefinition(String directive, int optionalCnt, List tokens) { - this.directive = directive; - this.tokens = tokens; - this.optionalCnt = optionalCnt; - } - - /** - * Returns the name of the directive for which the this UsageDefinition - * object is created. - * - * @return name of the directive. - */ - public String getDirectiveName() { - return directive; - } - - /** - * This method returns the list of TokenDefinition that should be - * used for parsing the directive into Arguments. - * - * @return List of TokenDefinition. - */ - public List getTokens() { - return tokens; - } - - /** - * Returns the count of TokenDefinition that have been specified - * as optional in the UsageDefinition. - * - * @return number of tokens in the usage that are optional. - */ - public int getOptionalTokensCount() { - return optionalCnt; - } - - /** - * This method converts the UsageDefinition into a usage string - * for this directive. It inspects all the tokens to generate a standard syntax - * for the usage of the directive. - * - * @return a usage representation of this object. - */ - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(directive).append(" "); - - int count = tokens.size(); - for (TokenDefinition token : tokens) { - if (token.optional()) { - sb.append(" ["); - } - - if (token.label() != null) { - sb.append(token.label()); - } else { - if (token.type().equals(TokenType.DIRECTIVE_NAME)) { - sb.append(token.name()); - } else if (token.type().equals(TokenType.COLUMN_NAME)) { - sb.append(":").append(token.name()); - } else if (token.type().equals(TokenType.COLUMN_NAME_LIST)) { - sb.append(":").append(token.name()).append(" [,:").append(token.name()).append(" ]*"); - } else if (token.type().equals(TokenType.BOOLEAN)) { - sb.append(token.name()).append(" (true/false)"); - } else if (token.type().equals(TokenType.TEXT)) { - sb.append("'").append(token.name()).append("'"); - } else if (token.type().equals(TokenType.IDENTIFIER) || token.type().equals(TokenType.NUMERIC)) { - sb.append(token.name()); - } else if (token.type().equals(TokenType.BOOLEAN_LIST) || token.type().equals(TokenType.NUMERIC_LIST) - || token.type().equals(TokenType.TEXT_LIST)) { - sb.append(token.name()).append("[,").append(token.name()).append(" ...]*"); - } else if (token.type().equals(TokenType.EXPRESSION)) { - sb.append("exp:{<").append(token.name()).append(">}"); - } else if (token.type().equals(TokenType.PROPERTIES)) { - sb.append("prop:{key:value,[key:value]*"); - } else if (token.type().equals(TokenType.RANGES)) { - sb.append("start:end=[bool|text|numeric][,start:end=[bool|text|numeric]*"); - } - } - - count--; - - if (token.optional()) { - sb.append("]"); - } else { - if (count > 0) { - sb.append(" "); - } - } - } - return sb.toString(); - } - - /** - * This is a static method for creating a builder for the UsageDefinition - * object. In order to create a UsageDefinition, a builder has to created. - * - *

This builder is provided as user API for constructing the usage specification - * for a directive.

- * - * @param directive name of the directive for which the builder is created for. - * @return A UsageDefinition.Builder object that can be used to construct - * UsageDefinition object. - */ - public static UsageDefinition.Builder builder(String directive) { - return new UsageDefinition.Builder(directive); - } - - /** - * This inner builder class provides a way to create UsageDefinition - * object. It exposes different methods that allow users to configure the TokenDefinition - * for each token used within the usage of a directive. - */ - public static final class Builder { - private final String directive; - private final List tokens; - private int currentOrdinal; - private int optionalCnt; - - public Builder(String directive) { - this.directive = directive; - this.currentOrdinal = 0; - this.tokens = new ArrayList<>(); - this.optionalCnt = 0; - } - - /** - * This method provides a way to set the name and the type of token, while - * defaulting the label to 'null' and setting the optional to FALSE. - * - * @param name of the token in the definition of a directive. - * @param type of the token to be extracted. - */ - public void define(String name, TokenType type) { - TokenDefinition spec = new TokenDefinition(name, type, null, currentOrdinal, Optional.FALSE); - currentOrdinal++; - tokens.add(spec); - } - - /** - * Allows users to define a token with a name, type of the token and additional optional - * for the label that is used during creation of the usage for the directive. - * - * @param name of the token in the definition of a directive. - * @param type of the token to be extracted. - * @param label label that modifies the usage for this field. - */ - public void define(String name, TokenType type, String label) { - TokenDefinition spec = new TokenDefinition(name, type, label, currentOrdinal, Optional.FALSE); - currentOrdinal++; - tokens.add(spec); - } - - /** - * Method allows users to specify a field as optional in combination to the - * name of the token and the type of token. - * - * @param name of the token in the definition of a directive. - * @param type of the token to be extracted. - * @param optional Optional#TRUE if token is optional, else Optional#FALSE. - */ - public void define(String name, TokenType type, boolean optional) { - TokenDefinition spec = new TokenDefinition(name, type, null, currentOrdinal, optional); - optionalCnt = optional ? optionalCnt + 1 : optionalCnt; - currentOrdinal++; - tokens.add(spec); - } - - /** - * Method allows users to specify a field as optional in combination to the - * name of the token, the type of token and also the ability to specify a label - * for the usage. - * - * @param name of the token in the definition of a directive. - * @param type of the token to be extracted. - * @param label label that modifies the usage for this field. - * @param optional Optional#TRUE if token is optional, else Optional#FALSE. - */ - public void define(String name, TokenType type, String label, boolean optional) { - TokenDefinition spec = new TokenDefinition(name, type, label, currentOrdinal, optional); - optionalCnt = optional ? optionalCnt + 1 : optionalCnt; - currentOrdinal++; - tokens.add(spec); - } - - /** - * @return a instance of UsageDefinition object. - */ - public UsageDefinition build() { - return new UsageDefinition(directive, optionalCnt, tokens); - } - } + // transient so it doesn't show up when serialized using gson in service endpoint responses + private final transient int optionalCnt; + private final String directive; + private final List tokens; + + private UsageDefinition(String directive, int optionalCnt, List tokens) { + this.directive = directive; + this.tokens = tokens; + this.optionalCnt = optionalCnt; + } + + /** + * Returns the name of the directive for which this UsageDefinition + * object is created. + * + * @return name of the directive. + */ + public String getDirectiveName() { + return directive; + } + + /** + * This method returns the list of TokenDefinition that should be + * used for parsing the directive into Arguments.. + * + * @return List of TokenDefinition. + */ + public List getTokens() { + return tokens; + } + + /** + * Returns the count of TokenDefinition that have been specified + * as optional in the UsageDefinition. + * + * @return number of tokens in the usage that are optional. + */ + public int getOptionalTokensCount() { + return optionalCnt; + } + + /** + * This method converts the UsageDefinition into a usage string + * for this directive. It inspects all the tokens to generate a standard syntax + * for the usage of the directive. + * + * @return a usage representation of this object. + */ + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(directive).append(" "); + + int count = tokens.size(); + for (TokenDefinition token : tokens) { + if (token.optional()) { + sb.append(" ["); + } + + if (token.label() != null) { + sb.append(token.label()); + } else { + if (token.type().equals(TokenType.DIRECTIVE_NAME)) { + sb.append(token.name()); + } else if (token.type().equals(TokenType.COLUMN_NAME)) { + sb.append(":").append(token.name()); + } else if (token.type().equals(TokenType.COLUMN_NAME_LIST)) { + sb.append(":").append(token.name()).append(" [,:").append(token.name()).append(" ]*"); + } else if (token.type().equals(TokenType.BOOLEAN)) { + sb.append(token.name()).append(" (true/false)"); + } else if (token.type().equals(TokenType.TEXT)) { + sb.append("'").append(token.name()).append("'"); + } else if (token.type().equals(TokenType.IDENTIFIER) || token.type().equals(TokenType.NUMERIC)) { + sb.append(token.name()); + } else if (token.type().equals(TokenType.BOOLEAN_LIST) || token.type().equals(TokenType.NUMERIC_LIST) + || token.type().equals(TokenType.TEXT_LIST)) { + sb.append(token.name()).append("[,").append(token.name()).append(" ...]*"); + } else if (token.type().equals(TokenType.EXPRESSION)) { + sb.append("exp:{<").append(token.name()).append(">}"); + } else if (token.type().equals(TokenType.PROPERTIES)) { + sb.append("prop:{key:value,[key:value]*"); + } else if (token.type().equals(TokenType.RANGES)) { + sb.append("start:end=[bool|text|numeric][,start:end=[bool|text|numeric]*"); + } else if (token.type().equals(TokenType.BYTE_SIZE)) { + sb.append(":").append(token.name()); + } else if (token.type().equals(TokenType.TIME_DURATION)) { + sb.append(":").append(token.name()); + } + } + + count--; + + if (token.optional()) { + sb.append("]"); + } else { + if (count > 0) { + sb.append(" "); + } + } + } + return sb.toString(); + } + + /** + * This is a static method for creating a builder for the UsageDefinition + * object. In order to create a UsageDefinition, a builder has to be created. + * + *

This builder is provided as a user API for constructing the usage specification + * for a directive.

+ * + * @param directive name of the directive for which the builder is created for. + * @return A UsageDefinition.Builder object that can be used to construct + * UsageDefinition object. + */ + public static UsageDefinition.Builder builder(String directive) { + return new UsageDefinition.Builder(directive); + } + + /** + * This inner builder class provides a way to create UsageDefinition + * object. It exposes different methods that allow users to configure the TokenDefinition + * for each token used within the usage of a directive. + */ + public static final class Builder { + private final String directive; + private final List tokens; + private int currentOrdinal; + private int optionalCnt; + + public Builder(String directive) { + this.directive = directive; + this.currentOrdinal = 0; + this.tokens = new ArrayList<>(); + this.optionalCnt = 0; + } + + /** + * This method provides a way to set the name and the type of token, while + * defaulting the label to 'null' and setting the optional to FALSE. + * + * @param name of the token in the definition of a directive. + * @param type of the token to be extracted. + */ + public void define(String name, TokenType type) { + TokenDefinition spec = new TokenDefinition(name, type, null, currentOrdinal, Optional.FALSE); + currentOrdinal++; + tokens.add(spec); + } + + /** + * Allows users to define a token with a name, type of the token and additional optional + * for the label that is used during creation of the usage for the directive. + * + * @param name of the token in the definition of a directive. + * @param type of the token to be extracted. + * @param label label that modifies the usage for this field. + */ + public void define(String name, TokenType type, String label) { + TokenDefinition spec = new TokenDefinition(name, type, label, currentOrdinal, Optional.FALSE); + currentOrdinal++; + tokens.add(spec); + } + + /** + * Method allows users to specify a field as optional in combination with the + * name of the token and the type of token. + * + * @param name of the token in the definition of a directive. + * @param type of the token to be extracted. + * @param optional Optional#TRUE if token is optional, else Optional#FALSE. + */ + public void define(String name, TokenType type, boolean optional) { + TokenDefinition spec = new TokenDefinition(name, type, null, currentOrdinal, optional); + optionalCnt = optional ? optionalCnt + 1 : optionalCnt; + currentOrdinal++; + tokens.add(spec); + } + + /** + * Method allows users to specify a field as optional in combination with the + * name of the token, the type of token and also the ability to specify a label + * for the usage. + * + * @param name of the token in the definition of a directive. + * @param type of the token to be extracted. + * @param label label that modifies the usage for this field. + * @param optional Optional#TRUE if token is optional, else Optional#FALSE. + */ + public void define(String name, TokenType type, String label, boolean optional) { + TokenDefinition spec = new TokenDefinition(name, type, label, currentOrdinal, optional); + optionalCnt = optional ? optionalCnt + 1 : optionalCnt; + currentOrdinal++; + tokens.add(spec); + } + + /** + * @return an instance of UsageDefinition object. + */ + public UsageDefinition build() { + return new UsageDefinition(directive, optionalCnt, tokens); + } + } } diff --git a/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/ByteSizeAndTimeDurationTest.java b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/ByteSizeAndTimeDurationTest.java new file mode 100644 index 000000000..c2bfceede --- /dev/null +++ b/wrangler-api/src/test/java/io/cdap/wrangler/api/parser/ByteSizeAndTimeDurationTest.java @@ -0,0 +1,67 @@ +/* + * Copyright © 2017-2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.wrangler.api.parser; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Unit tests for ByteSize and TimeDuration classes. + */ +public class ByteSizeAndTimeDurationTest { + + @Test + public void testByteSizeParsing() { + // Test parsing valid inputs to canonical byte units + Assert.assertEquals(1024L, new ByteSize("1KB").getBytes()); + Assert.assertEquals(1048576L, new ByteSize("1MB").getBytes()); + Assert.assertEquals(1073741824L, new ByteSize("1GB").getBytes()); + Assert.assertEquals(10, new ByteSize("10B").getBytes()); + + // Test for case insensitivity + Assert.assertEquals(1572864L, new ByteSize("1.5MB").getBytes()); + + } + + @Test + public void testTimeDurationParsing() { + // Test parsing valid inputs to canonical time units (milliseconds as double) + double delta = 0.0001; // Tolerance for double comparison + + // 5ms -> 5.0 ms + Assert.assertEquals(5.0, new TimeDuration("5ms").getMilliseconds(), delta); + + // 2.1s -> 2.1 * 1000.0 = 2100.0 ms + Assert.assertEquals(2100.0, new TimeDuration("2.1s").getMilliseconds(), delta); + + // 1h -> 1.0 * 60.0 * 60.0 * 1000.0 = 3,600,000.0 ms + Assert.assertEquals(3600000.0, new TimeDuration("1h").getMilliseconds(), delta); + + // Test for case insensitivity (using "min") + // 1.5min -> 1.5 * 60.0 * 1000.0 = 90,000.0 ms + Assert.assertEquals(90000.0, new TimeDuration("1.5min").getMilliseconds(), delta); + + // Test other units (assuming they were added to TimeDuration) + // 1000us -> 1000.0 / 1000.0 = 1.0 ms + Assert.assertEquals(1.0, new TimeDuration("1000us").getMilliseconds(), delta); + // 5000000ns -> 5000000.0 / 1000000.0 = 5.0 ms + Assert.assertEquals(5.0, new TimeDuration("5000000ns").getMilliseconds(), delta); + // 1d -> 1.0 * 24.0 * 60.0 * 60.0 * 1000.0 = 86,400,000.0 ms + Assert.assertEquals(86400000.0, new TimeDuration("1d").getMilliseconds(), delta); + } + +} diff --git a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 index 7c517ed6a..c494f1112 100644 --- a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 +++ b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 @@ -140,7 +140,7 @@ numberRange ; value - : String | Number | Column | Bool + : String | Number | Column | Bool | BYTE_SIZE | TIME_DURATION ; ecommand @@ -195,6 +195,28 @@ identifierList : Identifier (',' Identifier)* ; +BYTE_SIZE + : Digit+ BYTE_UNIT + ; + +TIME_DURATION + : Digit+ TIME_UNIT + ; + +fragment BYTE_UNIT + : [kK][bB] + | [mM][bB] + | [gG][bB] + | [tT][bB] + ; + +fragment TIME_UNIT + : [mM][sS] + | [sS] + | [mM][iI][nN] + | [hH] + ; + /* * Following are the Lexer Rules used for tokenizing the recipe. diff --git a/wrangler-core/src/main/java/io/cdap/directives/aggregates/AggregateStats.java b/wrangler-core/src/main/java/io/cdap/directives/aggregates/AggregateStats.java new file mode 100644 index 000000000..47f86486b --- /dev/null +++ b/wrangler-core/src/main/java/io/cdap/directives/aggregates/AggregateStats.java @@ -0,0 +1,159 @@ +/* + * Copyright © 2017-2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + + package io.cdap.directives.aggregates; + + import io.cdap.cdap.api.annotation.Name; + import io.cdap.cdap.api.annotation.Plugin; + import io.cdap.cdap.api.data.schema.Schema; + import io.cdap.wrangler.api.Arguments; + import io.cdap.wrangler.api.Directive; + import io.cdap.wrangler.api.DirectiveExecutionException; + import io.cdap.wrangler.api.DirectiveParseException; + import io.cdap.wrangler.api.ExecutorContext; + import io.cdap.wrangler.api.Row; + import io.cdap.wrangler.api.annotations.Categories; + import io.cdap.wrangler.api.parser.ByteSize; + import io.cdap.wrangler.api.parser.ColumnName; + import io.cdap.wrangler.api.parser.Text; + import io.cdap.wrangler.api.parser.TimeDuration; + import io.cdap.wrangler.api.parser.TokenType; + import io.cdap.wrangler.api.parser.UsageDefinition; + import java.util.ArrayList; + import java.util.List; + + /** + * Directive for aggregating statistics. + */ + @Plugin(type = Directive.TYPE) + @Name(AggregateStats.NAME) + @Categories(categories = { "data-aggregation"}) + public class AggregateStats implements Directive { + public static final String NAME = "aggregate-stats"; + private String byteCol; + private String timeCol; + private String outputSizeCol; + private String outputTimeCol; + private double totalBytes = 0.0; + private double totalTimeMs = 0.0; + private int count = 0; + + /** + * Defines the usage of the directive, specifying the required arguments. + * + * @return UsageDefinition object containing the directive's argument definitions. + */ + @Override + public UsageDefinition define() { + UsageDefinition.Builder builder = UsageDefinition.builder(NAME); + builder.define("byteCol", TokenType.COLUMN_NAME); + builder.define("timeCol", TokenType.COLUMN_NAME); + builder.define("outputSizeCol", TokenType.TEXT); + builder.define("outputTimeCol", TokenType.TEXT); + return builder.build(); + } + + /** + * Initializes the directive with the provided arguments. + * + * @param args Arguments object containing the directive's parameters. + * @throws DirectiveParseException if there is an error parsing the arguments. + */ + @Override + public void initialize(Arguments args) throws DirectiveParseException { + byteCol = ((ColumnName) args.value("byteCol")).value(); + timeCol = ((ColumnName) args.value("timeCol")).value(); + outputSizeCol = ((Text) args.value("outputSizeCol")).value(); + outputTimeCol = ((Text) args.value("outputTimeCol")).value(); + } + + /** + * Executes the directive on a list of rows, aggregating statistics based on the specified columns. + * + * @param rows List of Row objects to process. + * @param ctx ExecutorContext providing execution context information. + * @return List of Row objects containing the aggregated results. + * @throws DirectiveExecutionException if there is an error during execution. + */ + @Override + public List execute(List rows, ExecutorContext ctx) throws DirectiveExecutionException { + try { + for (Row row : rows) { + if (row.find(byteCol) != -1 && row.find(timeCol) != -1) { + String byteVal = row.getValue(byteCol).toString(); + String timeVal = row.getValue(timeCol).toString(); + + // Use ByteSize and TimeDuration classes for parsing + ByteSize byteSize = new ByteSize(byteVal); + TimeDuration duration = new TimeDuration(timeVal); + + totalBytes += byteSize.getBytes(); + totalTimeMs += duration.getMilliseconds(); + count++; + } + } + + if (count == 0) { + return new ArrayList<>(); + } + + List results = new ArrayList<>(); + Row result = new Row(); + + // Convert bytes to MB and milliseconds to seconds + result.add(outputSizeCol, totalBytes / (1024.0 * 1024.0)); + result.add(outputTimeCol, totalTimeMs / 1000.0); + + results.add(result); + return results; + + } catch (Exception e) { + throw new DirectiveExecutionException( + String.format("Error aggregating stats: %s", e.getMessage()) + ); + } + } + + /** + * Provides the output schema for the directive based on the input schema. + * + * @param inputSchema Schema object representing the input schema. + * @return Schema object representing the output schema. + */ + public Schema getOutputSchema(Schema inputSchema) { + List fields = new ArrayList<>(); + fields.add(Schema.Field.of(outputSizeCol, Schema.of(Schema.Type.DOUBLE))); + fields.add(Schema.Field.of(outputTimeCol, Schema.of(Schema.Type.DOUBLE))); + return Schema.recordOf("aggregate-stats", fields); + } + + /** + * Cleans up resources and resets the directive's state. + */ + @Override + public void destroy() { + // Reset all accumulated values + totalBytes = 0.0; + totalTimeMs = 0.0; + count = 0; + + // Clear column references + byteCol = null; + timeCol = null; + outputSizeCol = null; + outputTimeCol = null; + } +} diff --git a/wrangler-core/src/test/java/io/cdap/directives/aggregates/AggregateStatsTest.java b/wrangler-core/src/test/java/io/cdap/directives/aggregates/AggregateStatsTest.java new file mode 100644 index 000000000..ade25b0ee --- /dev/null +++ b/wrangler-core/src/test/java/io/cdap/directives/aggregates/AggregateStatsTest.java @@ -0,0 +1,206 @@ +/* +* Copyright © 2017-2019 Cask Data, Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); you may not +* use this file except in compliance with the License. You may obtain a copy of +* the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations under +* the License. +*/ + +package io.cdap.directives.aggregates; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +import com.google.gson.JsonElement; + +import io.cdap.wrangler.api.Arguments; +import io.cdap.wrangler.api.Row; +import io.cdap.wrangler.api.parser.ColumnName; +import io.cdap.wrangler.api.parser.Text; +import io.cdap.wrangler.api.parser.Token; +import io.cdap.wrangler.api.parser.TokenType; + +public class AggregateStatsTest { + + @Test + public void testAggregateStatsDirective() throws Exception { + // Step 1: Prepare input rows + List rows = new ArrayList<>(); + rows.add(new Row().add("data_transfer_size", "10KB").add("response_time", "2s")); + rows.add(new Row().add("data_transfer_size", "5KB").add("response_time", "500ms")); + + // Step 2: Create an instance of the directive + AggregateStats directive = new AggregateStats(); + + // Step 3: Manually mock Arguments + Arguments args = createMockArguments(); + + // Step 4: Initialize and execute the directive + directive.initialize(args); + List result = directive.execute(rows, null); + + // Step 5: Validate output + Assert.assertEquals(1, result.size()); + + Row output = result.get(0); + + // Size: 10KB + 5KB = 15KB = 15 * 1024 bytes = 15360 bytes + // MB = bytes / (1024 * 1024) + double expectedMB = 15360.0 / (1024 * 1024); + double actualMB = (Double) output.getValue("total_size_mb"); + + // Time: 2s + 500ms = 2500ms = 2.5s + double expectedSeconds = 2.5; + double actualSeconds = (Double) output.getValue("total_time_sec"); + + Assert.assertEquals(expectedMB, actualMB, 0.001); + Assert.assertEquals(expectedSeconds, actualSeconds, 0.001); + } + + @Test + public void testEmptyInputRows() throws Exception { + List rows = new ArrayList<>(); + + AggregateStats directive = new AggregateStats(); + Arguments args = createMockArguments(); + + directive.initialize(args); + List result = directive.execute(rows, null); + + Assert.assertEquals(0, result.size()); + } + + @Test + public void testInvalidData() throws Exception { + List rows = new ArrayList<>(); + rows.add(new Row().add("data_transfer_size", "invalidKB").add("response_time", "invalidTime")); + + AggregateStats directive = new AggregateStats(); + Arguments args = createMockArguments(); + + directive.initialize(args); + try { + directive.execute(rows, null); + Assert.fail("Expected an exception for invalid data"); + } catch (Exception e) { + Assert.assertTrue(e.getMessage().contains("Error aggregating stats")); + } + } + + @Test + public void testLargeData() throws Exception { + List rows = new ArrayList<>(); + rows.add(new Row().add("data_transfer_size", "1024MB").add("response_time", "1h")); + rows.add(new Row().add("data_transfer_size", "512MB").add("response_time", "30m")); + + AggregateStats directive = new AggregateStats(); + Arguments args = createMockArguments(); + + directive.initialize(args); + List result = directive.execute(rows, null); + + Assert.assertEquals(1, result.size()); + + Row output = result.get(0); + + double expectedMB = 1536.0; // 1024MB + 512MB + double actualMB = (Double) output.getValue("total_size_mb"); + + double expectedSeconds = 5400.0; // 1h + 30m = 5400 seconds + double actualSeconds = (Double) output.getValue("total_time_sec"); + + Assert.assertEquals(expectedMB, actualMB, 0.001); + Assert.assertEquals(expectedSeconds, actualSeconds, 0.001); + } + + @Test + public void testEdgeCases() throws Exception { + List rows = new ArrayList<>(); + rows.add(new Row().add("data_transfer_size", "0KB").add("response_time", "0s")); + + AggregateStats directive = new AggregateStats(); + Arguments args = createMockArguments(); + + directive.initialize(args); + List result = directive.execute(rows, null); + + Assert.assertEquals(1, result.size()); + + Row output = result.get(0); + + double expectedMB = 0.0; + double actualMB = (Double) output.getValue("total_size_mb"); + + double expectedSeconds = 0.0; + double actualSeconds = (Double) output.getValue("total_time_sec"); + + Assert.assertEquals(expectedMB, actualMB, 0.001); + Assert.assertEquals(expectedSeconds, actualSeconds, 0.001); + } + + private Arguments createMockArguments() { + return new Arguments() { + @SuppressWarnings("unchecked") + @Override + public T value(String name) { + switch (name) { + case "byteCol": + return (T) new ColumnName("data_transfer_size"); + case "timeCol": + return (T) new ColumnName("response_time"); + case "outputSizeCol": + return (T) new Text("total_size_mb"); + case "outputTimeCol": + return (T) new Text("total_time_sec"); + } + return null; + } + + @Override + public int size() { + return 4; + } + + @Override + public boolean contains(String name) { + return true; + } + + @Override + public TokenType type(String name) { + return null; + } + + @Override + public int line() { + return 1; + } + + @Override + public int column() { + return 0; + } + + @Override + public String source() { + return "aggregate-stats :data_transfer_size :response_time total_size_mb total_time_sec"; + } + + @Override + public JsonElement toJson() { + return null; + } + }; + } +} diff --git a/wrangler-core/src/test/java/io/cdap/wrangler/parser/RecipeVisitor.java b/wrangler-core/src/test/java/io/cdap/wrangler/parser/RecipeVisitor.java new file mode 100644 index 000000000..ed49ddf63 --- /dev/null +++ b/wrangler-core/src/test/java/io/cdap/wrangler/parser/RecipeVisitor.java @@ -0,0 +1,338 @@ +/* + * Copyright © 2017-2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + + package io.cdap.wrangler.parser; + + import io.cdap.wrangler.api.LazyNumber; + import io.cdap.wrangler.api.RecipeSymbol; + import io.cdap.wrangler.api.SourceInfo; + import io.cdap.wrangler.api.Triplet; + import io.cdap.wrangler.api.parser.*; + import org.antlr.v4.runtime.ParserRuleContext; + import org.antlr.v4.runtime.misc.Interval; + import org.antlr.v4.runtime.tree.ParseTree; + import org.antlr.v4.runtime.tree.TerminalNode; + + import java.util.ArrayList; + import java.util.HashMap; + import java.util.List; + import java.util.Map; + + /** + * This class RecipeVisitor implements the visitor pattern + * used during traversal of the AST tree. The ParserTree#Walker + * invokes appropriate methods as call backs with information about the node. + * + *

In order to understand what's being invoked, please look at the grammar file + * Directive.g4

. + * + *

This class exposes a getTokenGroups method for retrieving the + * RecipeSymbol after visiting. The RecipeSymbol represents + * all the TokenGroup for all directives in a recipe. Each directive + * will create a TokenGroup

+ * + *

As the ParseTree is walking through the call graph, it generates + * one TokenGroup for each directive in the recipe. Each TokenGroup + * contains parsed Tokens for that directive along with more information like + * SourceInfo. A collection of TokenGroup consistutes a RecipeSymbol + * that is returned by this function.

+ */ + public final class RecipeVisitor extends DirectivesBaseVisitor { + private RecipeSymbol.Builder builder = new RecipeSymbol.Builder(); + + /** + * Returns a RecipeSymbol for the recipe being parsed. This + * object has all the tokens that were successfully parsed along with source + * information for each directive in the recipe. + * + * @return An compiled object after parsing the recipe. + */ + public RecipeSymbol getCompiledUnit() { + return builder.build(); + } + + /** + * A Recipe is made up of Directives and Directives is made up of each individual + * Directive. This method is invoked on every visit to a new directive in the recipe. + */ + @Override + public RecipeSymbol.Builder visitDirective(DirectivesParser.DirectiveContext ctx) { + builder.createTokenGroup(getOriginalSource(ctx)); + return super.visitDirective(ctx); + } + + /** + * A Directive can include identifiers, this method extracts that token that is being + * identified as token of type Identifier. + */ + @Override + public RecipeSymbol.Builder visitIdentifier(DirectivesParser.IdentifierContext ctx) { + builder.addToken(new Identifier(ctx.Identifier().getText())); + return super.visitIdentifier(ctx); + } + + /** + * A Directive can include properties (which are a collection of key and value pairs), + * this method extracts that token that is being identified as token of type Properties. + */ + @Override + public RecipeSymbol.Builder visitPropertyList(DirectivesParser.PropertyListContext ctx) { + Map props = new HashMap<>(); + List properties = ctx.property(); + for (DirectivesParser.PropertyContext property : properties) { + String identifier = property.Identifier().getText(); + Token token; + if (property.number() != null) { + token = new Numeric(new LazyNumber(property.number().getText())); + } else if (property.bool() != null) { + token = new Bool(Boolean.valueOf(property.bool().getText())); + } else { + String text = property.text().getText(); + token = new Text(text.substring(1, text.length() - 1)); + } + props.put(identifier, token); + } + builder.addToken(new Properties(props)); + return builder; + } + + /** + * A Pragma is an instruction to the compiler to dynamically load the directives being specified + * from the DirectiveRegistry. These do not affect the data flow. + * + *

E.g. #pragma load-directives test1, test2, test3; will collect the tokens + * test1, test2 and test3 as dynamically loadable directives.

+ */ + @Override + public RecipeSymbol.Builder visitPragmaLoadDirective(DirectivesParser.PragmaLoadDirectiveContext ctx) { + List identifiers = ctx.identifierList().Identifier(); + for (TerminalNode identifier : identifiers) { + builder.addLoadableDirective(identifier.getText()); + } + return builder; + } + + /** + * A Pragma version is a informational directive to notify compiler about the grammar that is should + * be using to parse the directives below. + */ + @Override + public RecipeSymbol.Builder visitPragmaVersion(DirectivesParser.PragmaVersionContext ctx) { + builder.addVersion(ctx.Number().getText()); + return builder; + } + + /** + * A Directive can include number ranges like start:end=value[,start:end=value]*. This + * visitor method allows you to collect all the number ranges and create a token type + * Ranges. + */ + @Override + public RecipeSymbol.Builder visitNumberRanges(DirectivesParser.NumberRangesContext ctx) { + List> output = new ArrayList<>(); + List ranges = ctx.numberRange(); + for (DirectivesParser.NumberRangeContext range : ranges) { + List numbers = range.Number(); + String text = range.value().getText(); + if (text.startsWith("'") && text.endsWith("'")) { + text = text.substring(1, text.length() - 1); + } + Triplet val = + new Triplet<>(new Numeric(new LazyNumber(numbers.get(0).getText())), + new Numeric(new LazyNumber(numbers.get(1).getText())), + text + ); + output.add(val); + } + builder.addToken(new Ranges(output)); + return builder; + } + + /** + * This visitor method extracts the custom directive name specified. The custom + * directives are specified with a bang (!) at the start. + */ + @Override + public RecipeSymbol.Builder visitEcommand(DirectivesParser.EcommandContext ctx) { + builder.addToken(new DirectiveName(ctx.Identifier().getText())); + return builder; + } + + /** + * A Directive can consist of column specifiers. These are columns that the directive + * would operate on. When a token of type column is visited, it would generate a token + * type of type ColumnName. + */ + @Override + public RecipeSymbol.Builder visitColumn(DirectivesParser.ColumnContext ctx) { + builder.addToken(new ColumnName(ctx.Column().getText().substring(1))); + return builder; + } + + /** + * A Directive can consist of text field. These type of fields are enclosed within + * a single-quote or a double-quote. This visitor method extracts the string value + * within the quotes and creates a token type Text. + */ + @Override + public RecipeSymbol.Builder visitText(DirectivesParser.TextContext ctx) { + String value = ctx.String().getText(); + builder.addToken(new Text(value.substring(1, value.length() - 1))); + return builder; + } + + /** + * A Directive can consist of numeric field. This visitor method extracts the + * numeric value Numeric. + */ + @Override + public RecipeSymbol.Builder visitNumber(DirectivesParser.NumberContext ctx) { + LazyNumber number = new LazyNumber(ctx.Number().getText()); + builder.addToken(new Numeric(number)); + return builder; + } + + /** + * A Directive can consist of Bool field. The Bool field is represented as + * either true or false. This visitor method extract the bool value into a + * token type Bool. + */ + @Override + public RecipeSymbol.Builder visitBool(DirectivesParser.BoolContext ctx) { + builder.addToken(new Bool(Boolean.valueOf(ctx.Bool().getText()))); + return builder; + } + + /** + * A Directive can include a expression or a condition to be evaluated. When + * such a token type is found, the visitor extracts the expression and generates + * a token type Expression to be added to the TokenGroup + */ + @Override + public RecipeSymbol.Builder visitCondition(DirectivesParser.ConditionContext ctx) { + int childCount = ctx.getChildCount(); + StringBuilder sb = new StringBuilder(); + for (int i = 1; i < childCount - 1; ++i) { + ParseTree child = ctx.getChild(i); + sb.append(child.getText()).append(" "); + } + builder.addToken(new Expression(sb.toString())); + return builder; + } + + /** + * A Directive has name and in the parsing context it's called a command. + * This visitor methods extracts the command and creates a toke type DirectiveName + */ + @Override + public RecipeSymbol.Builder visitCommand(DirectivesParser.CommandContext ctx) { + builder.addToken(new DirectiveName(ctx.Identifier().getText())); + return builder; + } + + /** + * This visitor methods extracts the list of columns specified. It creates a token + * type ColumnNameList to be added to TokenGroup. + */ + @Override + public RecipeSymbol.Builder visitColList(DirectivesParser.ColListContext ctx) { + List columns = ctx.Column(); + List names = new ArrayList<>(); + for (TerminalNode column : columns) { + names.add(column.getText().substring(1)); + } + builder.addToken(new ColumnNameList(names)); + return builder; + } + + /** + * This visitor methods extracts the list of numeric specified. It creates a token + * type NumericList to be added to TokenGroup. + */ + @Override + public RecipeSymbol.Builder visitNumberList(DirectivesParser.NumberListContext ctx) { + List numbers = ctx.Number(); + List numerics = new ArrayList<>(); + for (TerminalNode number : numbers) { + numerics.add(new LazyNumber(number.getText())); + } + builder.addToken(new NumericList(numerics)); + return builder; + } + + /** + * This visitor methods extracts the list of booleans specified. It creates a token + * type BoolList to be added to TokenGroup. + */ + @Override + public RecipeSymbol.Builder visitBoolList(DirectivesParser.BoolListContext ctx) { + List bools = ctx.Bool(); + List booleans = new ArrayList<>(); + for (TerminalNode bool : bools) { + booleans.add(Boolean.parseBoolean(bool.getText())); + } + builder.addToken(new BoolList(booleans)); + return builder; + } + + /** + * This visitor methods extracts the list of strings specified. It creates a token + * type StringList to be added to TokenGroup. + */ + @Override + public RecipeSymbol.Builder visitStringList(DirectivesParser.StringListContext ctx) { + List strings = ctx.String(); + List strs = new ArrayList<>(); + for (TerminalNode string : strings) { + String text = string.getText(); + strs.add(text.substring(1, text.length() - 1)); + } + builder.addToken(new TextList(strs)); + return builder; + } + + private SourceInfo getOriginalSource(ParserRuleContext ctx) { + int a = ctx.getStart().getStartIndex(); + int b = ctx.getStop().getStopIndex(); + Interval interval = new Interval(a, b); + String text = ctx.start.getInputStream().getText(interval); + int lineno = ctx.getStart().getLine(); + int column = ctx.getStart().getCharPositionInLine(); + return new SourceInfo(lineno, column, text); + } + + @Override + public RecipeSymbol.Builder visitValue(DirectivesParser.ValueContext ctx) { + Token token; + if (ctx.Number() != null) { + token = new Numeric(new LazyNumber(ctx.Number().getText())); + } else if (ctx.Column() != null) { + token = new ColumnName(ctx.Column().getText().substring(1)); + } else if (ctx.Bool() != null) { + token = new Bool(Boolean.valueOf(ctx.Bool().getText())); + } else if (ctx.BYTE_SIZE() != null) { + token = new ByteSize(ctx.BYTE_SIZE().getText()); + } else if (ctx.TIME_DURATION() != null) { + token = new TimeDuration(ctx.TIME_DURATION().getText()); + } else { + String text = ctx.String().getText(); + token = new Text(text.substring(1, text.length() - 1)); + } + + builder.addToken(token); + return builder; + } + }