Hi,
We are running janusgraph on GCP with bigtable as the backend. I have observed some query behavior that really confuses me. Basically, I am guessing batch fetching from the backend is not happening for some queries for some reason, though I did set "query.batch" to true.
To start, here is my basic query. Basically it tries to trace upstream and find a subgraph.
Query 1: find 20 levels subgraph. performance is good.
g.V().has('node', 'fqn', 'xxxx').out('contains').repeat(__.in('flowsTo')).times(20)
Query 2: find until the no incoming edges. performance is NOT good.
g.V().has('node', 'fqn', 'xxxx').out('contains').repeat(__.in('flowsTo')).until(inE().count().is(0))
Query 3: add a vertex property filter. performance is NOT good.
g.V().has('node', 'fqn', 'xxxx').out('contains').repeat(__.in('flowsTo').has('type', 'column')).times(20)
Query 4: instead of vertex property filter, get back the values of the property and then filter. performance is good.
g.V().has('node', 'fqn', 'xxxx').out('contains').repeat(__.in('flowsTo').as('a').values('type').is('column').select('a')).times(20)
Looking at the profile result (attached), the backend fetching behavior looks very different. It looks like for query 1&4, it batch-fetches from the backend, but it doesn't happen for query 2&3.
Moreover, if I put something like “map”, “group”, “project”, the performance is also poor.
So I'm looking for some help here:
1. Is this behavior expected, or it's just bigtable or hbase that might have this issue?
2. What is the expected behavior of "query.batch"? Does the behavior that I observe mean that my "query.batch" is not taking effect?
3. Any suggestions that I can try to improve this will be greatly appreciated.
janusgraph.properties:
gremlin.graph=org.janusgraph.core.JanusGraphFactory
storage.backend: hbase
storage.directory: null
storage.hbase.ext.google.bigtable.instance.id: my-bigtable-id
storage.hbase.ext.google.bigtable.project.id: my-project-id
storage.hbase.ext.hbase.client.connection.impl: com.google.cloud.bigtable.hbase2_x.BigtableConnection
index.search.backend: elasticsearch
index.search.hostname: elasticsearch-master
index.search.directory: null
cache.db-cache: true
cache.db-cache-clean-wait: 20
cache.db-cache-time: 600000
cache.db-cache-size: 0.2
ids.block-size: 100000
ids.renew-percentage: 0.3
query.batch: true
query.batch-property-prefetch: true
metrics.enabled: false
gremlin-server.yaml:
host: 0.0.0.0
port: 8182
threadPoolWorker: 3
gremlinPool: 64
scriptEvaluationTimeout: "300000000"
channelizer: org.apache.tinkerpop.gremlin.server.channel.WebSocketChannelizer
graphs: {
graph: /etc/opt/janusgraph/janusgraph.properties
}
scriptEngines: {
gremlin-groovy: {
plugins: { org.janusgraph.graphdb.tinkerpop.plugin.JanusGraphGremlinPlugin: {},
org.apache.tinkerpop.gremlin.server.jsr223.GremlinServerGremlinPlugin: {},
org.apache.tinkerpop.gremlin.tinkergraph.jsr223.TinkerGraphGremlinPlugin: {},
org.apache.tinkerpop.gremlin.jsr223.ImportGremlinPlugin: {classImports: [java.lang.Math], methodImports: [java.lang.Math#*]},
org.apache.tinkerpop.gremlin.jsr223.ScriptFileGremlinPlugin: {files: [scripts/init.groovy]}}}}
serializers:
- { className: org.apache.tinkerpop.gremlin.driver.ser.GryoMessageSerializerV3d0, config: { ioRegistries: [org.janusgraph.graphdb.tinkerpop.JanusGraphIoRegistry] }}
- { className: org.apache.tinkerpop.gremlin.driver.ser.GryoMessageSerializerV3d0, config: { serializeResultToString: true }}
- { className: org.apache.tinkerpop.gremlin.driver.ser.GraphSONMessageSerializerV3d0, config: { ioRegistries: [org.janusgraph.graphdb.tinkerpop.JanusGraphIoRegistry] }}
processors:
- { className: org.apache.tinkerpop.gremlin.server.op.session.SessionOpProcessor, config: { sessionTimeout: 28800000, maxParameters: 256 }}
- { className: org.apache.tinkerpop.gremlin.server.op.traversal.TraversalOpProcessor, config: { cacheExpirationTime: 600000, cacheMaxSize: 1000 }}
- { className: org.apache.tinkerpop.gremlin.server.op.standard.StandardOpProcessor, config: { maxParameters: 256 }}
metrics: {
consoleReporter: {enabled: true, interval: 180000},
csvReporter: {enabled: false, interval: 180000, fileName: /tmp/gremlin-server-metrics.csv},
jmxReporter: {enabled: true},
slf4jReporter: {enabled: true, interval: 180000},
gangliaReporter: {enabled: false, interval: 180000, addressingMode: MULTICAST},
graphiteReporter: {enabled: false, interval: 180000}}
maxInitialLineLength: 4096
maxHeaderSize: 8192
maxChunkSize: 8192
maxContentLength: 10000000
maxAccumulationBufferComponents: 1024
resultIterationBatchSize: 64
writeBufferLowWaterMark: 32768
writeBufferHighWaterMark: 65536