Skip to content

Add fingerprinting to the client #77

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@

package org.apache.rocketmq.streams.client.source;

import java.util.Properties;
import javax.sql.DataSource;
import org.apache.rocketmq.streams.client.transform.DataStream;
import org.apache.rocketmq.streams.common.channel.impl.file.FileSource;
import org.apache.rocketmq.streams.common.channel.source.ISource;
import org.apache.rocketmq.streams.common.component.ComponentCreator;
import org.apache.rocketmq.streams.common.topology.builder.PipelineBuilder;
import org.apache.rocketmq.streams.source.RocketMQSource;

Expand All @@ -32,10 +35,23 @@ public DataStreamSource(String namespace, String pipelineName) {
this.mainPipelineBuilder = new PipelineBuilder(namespace, pipelineName);
}

public DataStreamSource(String namespace, String pipelineName, String[] duplicateKeys, Long windowSize) {
this.mainPipelineBuilder = new PipelineBuilder(namespace, pipelineName);
Properties properties = new Properties();
properties.setProperty(pipelineName + ".duplicate.fields.names", String.join(";", duplicateKeys));
properties.setProperty(pipelineName + ".duplicate.expiration.time", String.valueOf(windowSize));
ComponentCreator.createProperties(properties);
}

public static DataStreamSource create(String namespace, String pipelineName) {
return new DataStreamSource(namespace, pipelineName);
}

public static DataStreamSource create(String namespace, String pipelineName, String[] duplicateKeys,
Long expirationTime) {
return new DataStreamSource(namespace, pipelineName, duplicateKeys, expirationTime);
}

public DataStream fromFile(String filePath) {
return fromFile(filePath, true);
}
Expand Down Expand Up @@ -68,7 +84,7 @@ public DataStream fromRocketmq(String topic, String groupName, String tags, bool

public DataStream from(ISource<?> source) {
this.mainPipelineBuilder.setSource(source);
return new DataStream(this.mainPipelineBuilder,null);
return new DataStream(this.mainPipelineBuilder, null);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ public static Strategy highPerformance() {
return new WindowStrategy();
}

public static Strategy windowDefaultSiZe(int defualtSize){
ComponentCreator.getProperties().put(ConfigureFileKey.DIPPER_WINDOW_DEFAULT_INERVAL_SIZE,defualtSize);
public static Strategy windowDefaultSiZe(int defualtSize) {
ComponentCreator.getProperties().put(ConfigureFileKey.DIPPER_WINDOW_DEFAULT_INERVAL_SIZE, defualtSize);
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ protected <T> T operate(IMessage message, AbstractContext context) {
splitMessages.add(subMessage);
}
context.openSplitModel();
;
context.setSplitMessages(splitMessages);
return null;
} catch (Exception e) {
Expand All @@ -158,13 +157,17 @@ protected <T> T operate(IMessage message, AbstractContext context) {
}

public <O> DataStream filter(final FilterFunction<O> filterFunction) {
StageBuilder mapUDFOperator = new StageBuilder() {
return filter(filterFunction, new String[] {});
}

public <O> DataStream filter(final FilterFunction<O> filterFunction, String... fingerprints) {
StageBuilder mapUDFOperator = new StageBuilder() {
@Override
protected <T> T operate(IMessage message, AbstractContext context) {
try {
boolean isFilter = filterFunction.filter((O) message.getMessageValue());
if (isFilter) {
boolean tag = filterFunction.filter((O) message.getMessageValue());
if (!tag) {
context.put("NEED_USE_FINGER_PRINT", true);
context.breakExecute();
}
} catch (Exception e) {
Expand All @@ -175,6 +178,24 @@ protected <T> T operate(IMessage message, AbstractContext context) {
};
ChainStage stage = this.mainPipelineBuilder.createStage(mapUDFOperator);
this.mainPipelineBuilder.setTopologyStages(currentChainStage, stage);

if (fingerprints.length > 0) {
ChainPipeline<?> pipeline = this.mainPipelineBuilder.getPipeline();
String filterName = stage.getLabel();
if (!pipeline.isTopology()) {
List<?> stages = pipeline.getStages();
int i = 0;
for (Object st : stages) {
if (st == stage) {
break;
}
i++;
}
filterName = i + "";
}
String key = MapKeyUtil.createKeyBySign(".", pipeline.getNameSpace(), pipeline.getConfigureName(), filterName);
ComponentCreator.getProperties().setProperty(key, String.join(",", fingerprints));
}
return new DataStream(this.mainPipelineBuilder, this.otherPipelineBuilders, stage);
}

Expand Down Expand Up @@ -463,7 +484,8 @@ public DataStreamAction toRocketmq(String topic, String tags, String groupName,
return toRocketmq(topic, tags, groupName, -1, nameServerAddress, clusterName, order);
}

public DataStreamAction toRocketmq(String topic, String tags, String groupName, int batchSize, String nameServerAddress,
public DataStreamAction toRocketmq(String topic, String tags, String groupName, int batchSize,
String nameServerAddress,
String clusterName, boolean order) {
RocketMQSink rocketMQSink = new RocketMQSink();
if (StringUtils.isNotBlank(topic)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,74 +46,86 @@ public void init() {
@Test
public void testFromFile() {
dataStream
.fromFile("/Users/junjie.cheng/text.txt", false)
.map(message -> message + "--")
.toPrint(1)
.start();
.fromFile("/Users/junjie.cheng/text.txt", false)
.map(message -> message + "--")
.toPrint(1)
.start();
}

@Test
public void testRocketmq() {
DataStreamSource dataStream = StreamBuilder.dataStream("test_namespace", "graph_pipeline");
dataStream
.fromRocketmq("topic_xxxx01", "consumer_xxxx01", "127.0.0.1:9876")
.map(message -> message + "--")
.toPrint(1)
.start();
.fromRocketmq("topic_xxxx01", "consumer_xxxx01", "127.0.0.1:9876")
.map(message -> message + "--")
.toPrint(1)
.start();
}

@Test
public void testDBCheckPoint() {
dataStream
.fromRocketmq("topic_xxxx02", "consumer_xxxx02", "127.0.0.1:9876")
.map(message -> message + "--")
.toPrint(1)
.with(WindowStrategy.exactlyOnce("", "", ""))
.start();
.fromRocketmq("topic_xxxx02", "consumer_xxxx02", "127.0.0.1:9876")
.map(message -> message + "--")
.toPrint(1)
.with(WindowStrategy.exactlyOnce("", "", ""))
.start();
}

@Test
public void testFileCheckPoint() {
dataStream
.fromFile("/Users/junjie.cheng/text.txt", false)
.map(message -> message + "--")
.toPrint(1)
.with(WindowStrategy.highPerformance())
.start();
.fromFile("/Users/junjie.cheng/text.txt", false)
.map(message -> message + "--")
.toPrint(1)
.with(WindowStrategy.highPerformance())
.start();
}


@Test
public void testWindow() {
DataStreamSource dataStream = StreamBuilder.dataStream("test_namespace", "graph_pipeline");
dataStream
.fromRocketmq("topic_xxxx03", "consumer_xxxx03", "127.0.0.1:9876")
.map(new MapFunction<JSONObject, String>() {

@Override
public JSONObject map(String message) throws Exception {
JSONObject msg = JSONObject.parseObject(message);
return msg;
}
})
.window(TumblingWindow.of(Time.seconds(5)))
.groupBy("name", "age")
.count("c")
.sum("score", "scoreValue")
.toDataSteam()
.toPrint(1)
.with(WindowStrategy.exactlyOnce("", "", ""))
.start();
.fromRocketmq("topic_xxxx03", "consumer_xxxx03", "127.0.0.1:9876")
.map(new MapFunction<JSONObject, String>() {

@Override
public JSONObject map(String message) throws Exception {
JSONObject msg = JSONObject.parseObject(message);
return msg;
}
})
.window(TumblingWindow.of(Time.seconds(5)))
.groupBy("name", "age")
.count("c")
.sum("score", "scoreValue")
.toDataSteam()
.toPrint(1)
.with(WindowStrategy.exactlyOnce("", "", ""))
.start();
}

@Test
public void testFingerPrintStrategy() {
dataStream
.fromFile("/Users/junjie.cheng/text.txt", false)
.map(message -> message + "--")
.toPrint(1)
.start();

}

@Test
public void testBothStrategy() {
dataStream
.fromRocketmq("topic_xxxx04", "consumer_xxxx04", "127.0.0.1:9876")
.map(message -> message + "--")
.toPrint(1)
.with()
.start();
.fromRocketmq("topic_xxxx04", "consumer_xxxx04", "127.0.0.1:9876")
.map(message -> message + "--")
.filter(message -> {
return true;
})
.toPrint(1)
.with()
.start();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public MessageHeader copy() {
header.msgRouteFromLable = msgRouteFromLable;
header.logFingerprintValue = logFingerprintValue;
header.messageQueue = messageQueue;
header.checkpointQueueIds=checkpointQueueIds;
header.checkpointQueueIds = checkpointQueueIds;
return header;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@
public interface FilterFunction<T> extends Function {

boolean filter(T value) throws Exception;

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.rocketmq.streams.common.optimization;

import com.gliwka.hyperscan.wrapper.CompileErrorException;
import com.gliwka.hyperscan.wrapper.Database;
import com.gliwka.hyperscan.wrapper.Expression;
import com.gliwka.hyperscan.wrapper.ExpressionFlag;
Expand All @@ -27,23 +28,29 @@
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.rocketmq.streams.common.utils.StringUtil;

public class HyperscanRegex<T> {
protected List<Expression> regexs = new ArrayList<>();
protected List<Expression> allRegexes = new ArrayList<>();//all registe regex

protected Database db;
protected Scanner scanner;
protected AtomicBoolean hasCompile = new AtomicBoolean(false);
protected List<T> list = new ArrayList<>();
protected List<T> expressionContextList = new ArrayList<>();

protected List<Expression> notSupportCompileExpression = new ArrayList<>();//can not comile expressions
protected List<Expression> supportCompileExpression = new ArrayList<>();//all regex exclude not support compile

/**
* 把多个表达式放到库里
*
* @param regex
*/
public void addRegex(String regex, T context) {
list.add(context);
Expression expression = new Expression(regex, EnumSet.of(ExpressionFlag.UTF8, ExpressionFlag.CASELESS, ExpressionFlag.SINGLEMATCH), list.size() - 1);
regexs.add(expression);
expressionContextList.add(context);
Expression expression = new Expression(regex, EnumSet.of(ExpressionFlag.UTF8, ExpressionFlag.CASELESS, ExpressionFlag.SINGLEMATCH), expressionContextList.size() - 1);
allRegexes.add(expression);
supportCompileExpression.add(expression);
db = null;
scanner = null;
hasCompile.set(false);
Expand All @@ -53,17 +60,25 @@ public void addRegex(String regex, T context) {
* 完成编译
*/
public void compile() {
try {
if (hasCompile.compareAndSet(false, true) && regexs.size() > 0) {
Database db = Database.compile(regexs);
if (!hasCompile.compareAndSet(false, true) || supportCompileExpression.size() == 0) {
return;
}
while (true) {
try {
if (supportCompileExpression.size() == 0) {
break;
}
Database db = Database.compile(supportCompileExpression);
Scanner scanner = new Scanner();
scanner.allocScratch(db);
this.db = db;
this.scanner = scanner;
break;
} catch (CompileErrorException e) {
Expression expression = e.getFailedExpression();
this.supportCompileExpression.remove(expression);
this.notSupportCompileExpression.add(expression);
}

} catch (Exception e) {
System.out.println("can not support this regex " + e.getMessage());
}

}
Expand All @@ -75,15 +90,14 @@ public void compile() {
* @return
*/
public boolean match(String content) {
if (scanner == null || db == null || hasCompile.get() == false) {
if (scanner == null || db == null || !hasCompile.get()) {
compile();
}
List<Match> matches = scanner.scan(db, content);
if (matches.size() > 0) {
return true;
} else {
if (content == null) {
return false;
}
List<Match> matches = scanner.scan(db, content);
return matches.size() > 0;
}

/**
Expand All @@ -93,18 +107,34 @@ public boolean match(String content) {
* @return
*/
public Set<T> matchExpression(String content) {
if (scanner == null || db == null || hasCompile.get() == false) {
if (scanner == null || db == null || !hasCompile.get()) {
compile();
}
if (content == null) {
return new HashSet<>();
}
List<Match> matches = scanner.scan(db, content);
Set<T> fireExpressions = new HashSet<>();
if (matches.size() == 0) {
return fireExpressions;
if (this.notSupportCompileExpression.size() > 0) {
for (Expression expression : this.notSupportCompileExpression) {
String regex = expression.getExpression();
boolean isMatch = StringUtil.matchRegexCaseInsensitive(content, regex);
if (isMatch) {
int index = expression.getId();
fireExpressions.add(expressionContextList.get(index));
}
}
}
for (Match match : matches) {
Integer index = match.getMatchedExpression().getId();
fireExpressions.add(list.get(index));
if (matches.size() > 0) {
for (Match match : matches) {
Integer index = match.getMatchedExpression().getId();
fireExpressions.add(expressionContextList.get(index));
}
}
return fireExpressions;
}

public int size() {
return allRegexes.size();
}
}
Loading