Date
1 - 7 of 7
How Can I make a statistics,i.e:how many vertexes or edges?
spirit...@...
My graph has about 100 million vertexes and 200 million edges. But if use the following code, it is too slow.
I want to computer the count of vertex or edge directly through Hbase. The following code is:
I made a test by making a small graph-- two vertexes and two edges.
GraphTraversal<Vertex, Long> countV = traversal.V().count();
while (countV.hasNext()){
System.out.println("countV:" + countV.next());
}
GraphTraversal<Edge, Long> countE = traversal.E().count();
while (countE.hasNext()){
System.out.println("countE:" + countE.next());
}
I want to computer the count of vertex or edge directly through Hbase. The following code is:
SnapshotCounter.HBaseGetter entryGetter = new SnapshotCounter.HBaseGetter();
EntryList entryList = StaticArrayEntryList.ofBytes(
result.getMap().get(Bytes.toBytes("e")).entrySet(),
entryGetter);
StandardTitanTx tx = (StandardTitanTx) graph.newTransaction();
System.out.println("Entry list size: " + entryList.size());
int cnt = 0;
// IDInspector inspector = graph.getIDInspector();
for (Entry entry : entryList) {
RelationCache relation = graph.getEdgeSerializer().readRelation(entry, false, tx);
// Direction direction = graph.getEdgeSerializer().parseDirection(entry);
// System.out.println("Direction is:" + direction.name());
// System.out.println("relation is:" + relation);
// System.out.println("numProperties: " + relation.numProperties());
// Iterator<LongObjectCursor<Object>> longObjectCursorIterator = relation.propertyIterator();
// LongObjectCursor<Object> next = longObjectCursorIterator.next();
// System.out.println("key is:" + next.key);
// System.out.println("value is:" + next.value);
// System.out.println("next.toString is:" + next.toString());
RelationType type = tx.getExistingRelationType(relation.typeId);
Iterator<Edge> edgeIterator1 = type.edges(Direction.BOTH);
while (edgeIterator1.hasNext()){
Edge next11 = edgeIterator1.next();
System.out.println("relType is :" + next11.property("relType"));
}
// if (type.isEdgeLabel() && !tx.getIdInspector().isEdgeLabelId(relation.relationId)){
// if (type.isEdgeLabel() && !graph.getIDManager().isEdgeLabelId(relation.relationId) &&
// !tx.getIdInspector().isRelationTypeId(type.longId())) {
if (type.isEdgeLabel() ) {
cnt++;
System.out.print("isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationTypeId(relation.typeId));
System.out.print("isEdgeLabelId: ");
System.out.println(graph.getIDManager().isEdgeLabelId(relation.typeId));
System.out.print("type isEdgeLabel: ");
System.out.println(type.isEdgeLabel());
System.out.print("relationId isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationTypeId(relation.relationId));
System.out.println(entry.getValue().toString());
}
}
System.out.println("Edge count: " + cnt);
I made a test by making a small graph-- two vertexes and two edges.
But I just get the count of the edge is one, expecting two. Is there any problem? Please help....This problem bugs me. Thanks~~~
spirit...@...
anybody online? please help me~
在 2017年7月17日星期一 UTC+8下午5:53:32,spi...@...写道:
在 2017年7月17日星期一 UTC+8下午5:53:32,spi...@...写道:
My graph has about 100 million vertexes and 200 million edges. But if use the following code, it is too slow.
GraphTraversal<Vertex, Long> countV = traversal.V().count();while (countV.hasNext()){
System.out.println("countV:" + countV.next());
}
GraphTraversal<Edge, Long> countE = traversal.E().count();
while (countE.hasNext()){
System.out.println("countE:" + countE.next());
}
I want to computer the count of vertex or edge directly through Hbase. The following code is:
SnapshotCounter.HBaseGetter entryGetter = new SnapshotCounter.HBaseGetter();
EntryList entryList = StaticArrayEntryList.ofBytes(
result.getMap().get(Bytes.toBytes("e")).entrySet(),
entryGetter);
StandardTitanTx tx = (StandardTitanTx) graph.newTransaction();
System.out.println("Entry list size: " + entryList.size());
int cnt = 0;
// IDInspector inspector = graph.getIDInspector();
for (Entry entry : entryList) {
RelationCache relation = graph.getEdgeSerializer().readRelation(entry, false, tx);
// Direction direction = graph.getEdgeSerializer().parseDirection(entry);
// System.out.println("Direction is:" + direction.name());
// System.out.println("relation is:" + relation);
// System.out.println("numProperties: " + relation.numProperties());
// Iterator<LongObjectCursor<Object>> longObjectCursorIterator = relation.propertyIterator();
// LongObjectCursor<Object> next = longObjectCursorIterator.next();
// System.out.println("key is:" + next.key);
// System.out.println("value is:" + next.value);
// System.out.println("next.toString is:" + next.toString());
RelationType type = tx.getExistingRelationType(relation.typeId);
Iterator<Edge> edgeIterator1 = type.edges(Direction.BOTH);
while (edgeIterator1.hasNext()){
Edge next11 = edgeIterator1.next();
System.out.println("relType is :" + next11.property("relType"));
}
// if (type.isEdgeLabel() && !tx.getIdInspector().isEdgeLabelId(relation. relationId)){
// if (type.isEdgeLabel() && !graph.getIDManager().isEdgeLabelId(relation. relationId) &&
// !tx.getIdInspector().isRelationTypeId(type.longId() )) {
if (type.isEdgeLabel() ) {
cnt++;
System.out.print("isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.typeId));
System.out.print("isEdgeLabelId: ");
System.out.println(graph.getIDManager().isEdgeLabelId(relati on.typeId));
System.out.print("type isEdgeLabel: ");
System.out.println(type.isEdgeLabel());
System.out.print("relationId isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.relationId));
System.out.println(entry.getValue().toString());
}
}
System.out.println("Edge count: " + cnt);
I made a test by making a small graph-- two vertexes and two edges.But I just get the count of the edge is one, expecting two. Is there any problem? Please help....This problem bugs me. Thanks~~~
Jason Plurad <plu...@...>
If you scroll back just a few days in the message history of this group, you'll find a link to this nice blog post: "Configuring JanusGraph for spark-yarn" https://groups.google.com/d/msg/janusgraph-users/9e82gcUTB4M/evKFnB3cAgAJ
HadoopMarc covers doing an OLAP vertex count with JanusGraph + HBase, which it sounds like what you're trying to do, and it has an example properties file.
I can't really tell what you're trying to do in that code snippet. It would be best if you would share the code publicly on GitHub or BitBucket or something similar so if somebody wanted to try it out, it would be easy to do.
> anybody online? please help me~
JanusGraph is run by volunteers contributing to the open source project. Immediate responses may not happen. Using the mailing list and searching its archive is your best bet for learning from the community because there are several hundred folks that are subscribed to this list.
HadoopMarc covers doing an OLAP vertex count with JanusGraph + HBase, which it sounds like what you're trying to do, and it has an example properties file.
I can't really tell what you're trying to do in that code snippet. It would be best if you would share the code publicly on GitHub or BitBucket or something similar so if somebody wanted to try it out, it would be easy to do.
> anybody online? please help me~
JanusGraph is run by volunteers contributing to the open source project. Immediate responses may not happen. Using the mailing list and searching its archive is your best bet for learning from the community because there are several hundred folks that are subscribed to this list.
On Monday, July 17, 2017 at 11:35:47 PM UTC-4, spirit888hill wrote:
anybody online? please help me~
在 2017年7月17日星期一 UTC+8下午5:53:32,写道:My graph has about 100 million vertexes and 200 million edges. But if use the following code, it is too slow.
GraphTraversal<Vertex, Long> countV = traversal.V().count();while (countV.hasNext()){
System.out.println("countV:" + countV.next());
}
GraphTraversal<Edge, Long> countE = traversal.E().count();
while (countE.hasNext()){
System.out.println("countE:" + countE.next());
}
I want to computer the count of vertex or edge directly through Hbase. The following code is:
SnapshotCounter.HBaseGetter entryGetter = new SnapshotCounter.HBaseGetter();
EntryList entryList = StaticArrayEntryList.ofBytes(
result.getMap().get(Bytes.toBytes("e")).entrySet(),
entryGetter);
StandardTitanTx tx = (StandardTitanTx) graph.newTransaction();
System.out.println("Entry list size: " + entryList.size());
int cnt = 0;
// IDInspector inspector = graph.getIDInspector();
for (Entry entry : entryList) {
RelationCache relation = graph.getEdgeSerializer().readRelation(entry, false, tx);
// Direction direction = graph.getEdgeSerializer().parseDirection(entry);
// System.out.println("Direction is:" + direction.name());
// System.out.println("relation is:" + relation);
// System.out.println("numProperties: " + relation.numProperties());
// Iterator<LongObjectCursor<Object>> longObjectCursorIterator = relation.propertyIterator();
// LongObjectCursor<Object> next = longObjectCursorIterator.next();
// System.out.println("key is:" + next.key);
// System.out.println("value is:" + next.value);
// System.out.println("next.toString is:" + next.toString());
RelationType type = tx.getExistingRelationType(relation.typeId);
Iterator<Edge> edgeIterator1 = type.edges(Direction.BOTH);
while (edgeIterator1.hasNext()){
Edge next11 = edgeIterator1.next();
System.out.println("relType is :" + next11.property("relType"));
}
// if (type.isEdgeLabel() && !tx.getIdInspector().isEdgeLabelId(relation. relationId)){
// if (type.isEdgeLabel() && !graph.getIDManager().isEdgeLabelId(relation. relationId) &&
// !tx.getIdInspector().isRelationTypeId(type.longId() )) {
if (type.isEdgeLabel() ) {
cnt++;
System.out.print("isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.typeId));
System.out.print("isEdgeLabelId: ");
System.out.println(graph.getIDManager().isEdgeLabelId(relati on.typeId));
System.out.print("type isEdgeLabel: ");
System.out.println(type.isEdgeLabel());
System.out.print("relationId isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.relationId));
System.out.println(entry.getValue().toString());
}
}
System.out.println("Edge count: " + cnt);
I made a test by making a small graph-- two vertexes and two edges.But I just get the count of the edge is one, expecting two. Is there any problem? Please help....This problem bugs me. Thanks~~~
spirit...@...
Thanks Jason for your reply.
I'll show my code on GitHub later.
And I want to know what the HadoopMarc is? You mean Hadoop MapReduce??
在 2017年7月18日星期二 UTC+8下午10:13:53,Jason Plurad写道:
在 2017年7月18日星期二 UTC+8下午10:13:53,Jason Plurad写道:
If you scroll back just a few days in the message history of this group, you'll find a link to this nice blog post: "Configuring JanusGraph for spark-yarn" https://groups.google.com/d/msg/janusgraph-users/ 9e82gcUTB4M/evKFnB3cAgAJ
HadoopMarc covers doing an OLAP vertex count with JanusGraph + HBase, which it sounds like what you're trying to do, and it has an example properties file.
I can't really tell what you're trying to do in that code snippet. It would be best if you would share the code publicly on GitHub or BitBucket or something similar so if somebody wanted to try it out, it would be easy to do.
> anybody online? please help me~
JanusGraph is run by volunteers contributing to the open source project. Immediate responses may not happen. Using the mailing list and searching its archive is your best bet for learning from the community because there are several hundred folks that are subscribed to this list.
On Monday, July 17, 2017 at 11:35:47 PM UTC-4, spirit888hill wrote:anybody online? please help me~
在 2017年7月17日星期一 UTC+8下午5:53:32,写道:My graph has about 100 million vertexes and 200 million edges. But if use the following code, it is too slow.
GraphTraversal<Vertex, Long> countV = traversal.V().count();while (countV.hasNext()){
System.out.println("countV:" + countV.next());
}
GraphTraversal<Edge, Long> countE = traversal.E().count();
while (countE.hasNext()){
System.out.println("countE:" + countE.next());
}
I want to computer the count of vertex or edge directly through Hbase. The following code is:
SnapshotCounter.HBaseGetter entryGetter = new SnapshotCounter.HBaseGetter();
EntryList entryList = StaticArrayEntryList.ofBytes(
result.getMap().get(Bytes.toBytes("e")).entrySet(),
entryGetter);
StandardTitanTx tx = (StandardTitanTx) graph.newTransaction();
System.out.println("Entry list size: " + entryList.size());
int cnt = 0;
// IDInspector inspector = graph.getIDInspector();
for (Entry entry : entryList) {
RelationCache relation = graph.getEdgeSerializer().readRelation(entry, false, tx);
// Direction direction = graph.getEdgeSerializer().parseDirection(entry);
// System.out.println("Direction is:" + direction.name());
// System.out.println("relation is:" + relation);
// System.out.println("numProperties: " + relation.numProperties());
// Iterator<LongObjectCursor<Object>> longObjectCursorIterator = relation.propertyIterator();
// LongObjectCursor<Object> next = longObjectCursorIterator.next();
// System.out.println("key is:" + next.key);
// System.out.println("value is:" + next.value);
// System.out.println("next.toString is:" + next.toString());
RelationType type = tx.getExistingRelationType(relation.typeId);
Iterator<Edge> edgeIterator1 = type.edges(Direction.BOTH);
while (edgeIterator1.hasNext()){
Edge next11 = edgeIterator1.next();
System.out.println("relType is :" + next11.property("relType"));
}
// if (type.isEdgeLabel() && !tx.getIdInspector().isEdgeLabelId(relation. relationId)){
// if (type.isEdgeLabel() && !graph.getIDManager().isEdgeLabelId(relation. relationId) &&
// !tx.getIdInspector().isRelationTypeId(type.longId() )) {
if (type.isEdgeLabel() ) {
cnt++;
System.out.print("isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.typeId));
System.out.print("isEdgeLabelId: ");
System.out.println(graph.getIDManager().isEdgeLabelId(relati on.typeId));
System.out.print("type isEdgeLabel: ");
System.out.println(type.isEdgeLabel());
System.out.print("relationId isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.relationId));
System.out.println(entry.getValue().toString());
}
}
System.out.println("Edge count: " + cnt);
I made a test by making a small graph-- two vertexes and two edges.But I just get the count of the edge is one, expecting two. Is there any problem? Please help....This problem bugs me. Thanks~~~
spirit...@...
Oooops, I'm so sorry. HadoopMarc is a person.:)
在 2017年7月18日星期二 UTC+8下午10:13:53,Jason Plurad写道:
在 2017年7月18日星期二 UTC+8下午10:13:53,Jason Plurad写道:
If you scroll back just a few days in the message history of this group, you'll find a link to this nice blog post: "Configuring JanusGraph for spark-yarn" https://groups.google.com/d/msg/janusgraph-users/ 9e82gcUTB4M/evKFnB3cAgAJ
HadoopMarc covers doing an OLAP vertex count with JanusGraph + HBase, which it sounds like what you're trying to do, and it has an example properties file.
I can't really tell what you're trying to do in that code snippet. It would be best if you would share the code publicly on GitHub or BitBucket or something similar so if somebody wanted to try it out, it would be easy to do.
> anybody online? please help me~
JanusGraph is run by volunteers contributing to the open source project. Immediate responses may not happen. Using the mailing list and searching its archive is your best bet for learning from the community because there are several hundred folks that are subscribed to this list.
On Monday, July 17, 2017 at 11:35:47 PM UTC-4, spirit888hill wrote:anybody online? please help me~
在 2017年7月17日星期一 UTC+8下午5:53:32,写道:My graph has about 100 million vertexes and 200 million edges. But if use the following code, it is too slow.
GraphTraversal<Vertex, Long> countV = traversal.V().count();while (countV.hasNext()){
System.out.println("countV:" + countV.next());
}
GraphTraversal<Edge, Long> countE = traversal.E().count();
while (countE.hasNext()){
System.out.println("countE:" + countE.next());
}
I want to computer the count of vertex or edge directly through Hbase. The following code is:
SnapshotCounter.HBaseGetter entryGetter = new SnapshotCounter.HBaseGetter();
EntryList entryList = StaticArrayEntryList.ofBytes(
result.getMap().get(Bytes.toBytes("e")).entrySet(),
entryGetter);
StandardTitanTx tx = (StandardTitanTx) graph.newTransaction();
System.out.println("Entry list size: " + entryList.size());
int cnt = 0;
// IDInspector inspector = graph.getIDInspector();
for (Entry entry : entryList) {
RelationCache relation = graph.getEdgeSerializer().readRelation(entry, false, tx);
// Direction direction = graph.getEdgeSerializer().parseDirection(entry);
// System.out.println("Direction is:" + direction.name());
// System.out.println("relation is:" + relation);
// System.out.println("numProperties: " + relation.numProperties());
// Iterator<LongObjectCursor<Object>> longObjectCursorIterator = relation.propertyIterator();
// LongObjectCursor<Object> next = longObjectCursorIterator.next();
// System.out.println("key is:" + next.key);
// System.out.println("value is:" + next.value);
// System.out.println("next.toString is:" + next.toString());
RelationType type = tx.getExistingRelationType(relation.typeId);
Iterator<Edge> edgeIterator1 = type.edges(Direction.BOTH);
while (edgeIterator1.hasNext()){
Edge next11 = edgeIterator1.next();
System.out.println("relType is :" + next11.property("relType"));
}
// if (type.isEdgeLabel() && !tx.getIdInspector().isEdgeLabelId(relation. relationId)){
// if (type.isEdgeLabel() && !graph.getIDManager().isEdgeLabelId(relation. relationId) &&
// !tx.getIdInspector().isRelationTypeId(type.longId() )) {
if (type.isEdgeLabel() ) {
cnt++;
System.out.print("isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.typeId));
System.out.print("isEdgeLabelId: ");
System.out.println(graph.getIDManager().isEdgeLabelId(relati on.typeId));
System.out.print("type isEdgeLabel: ");
System.out.println(type.isEdgeLabel());
System.out.print("relationId isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.relationId));
System.out.println(entry.getValue().toString());
}
}
System.out.println("Edge count: " + cnt);
I made a test by making a small graph-- two vertexes and two edges.But I just get the count of the edge is one, expecting two. Is there any problem? Please help....This problem bugs me. Thanks~~~
spirit...@...
I config the gremlin and spark-on-yarn according to the post. I lost the dependencies? or version conflicts?
在 2017年7月18日星期二 UTC+8下午10:13:53,Jason Plurad写道:
But I ran into the problem
10:47:47,199 INFO KryoShimServiceLoader:117 - Set KryoShimService provider to org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService@4b31a708 (class org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService) because its priority value (0) is the highest available10:47:47,199 INFO KryoShimServiceLoader:123 - Configuring KryoShimService provider org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService@4b31a708 with user-provided configuration10:47:51,447 INFO SparkContext:58 - Running Spark version 1.6.110:47:51,495 INFO SecurityManager:58 - Changing view acls to: rc10:47:51,496 INFO SecurityManager:58 - Changing modify acls to: rc10:47:51,496 INFO SecurityManager:58 - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(rc); users with modify permissions: Set(rc)10:47:51,855 INFO Utils:58 - Successfully started service 'sparkDriver' on port 41967.10:47:52,450 INFO Slf4jLogger:80 - Slf4jLogger started10:47:52,504 INFO Remoting:74 - Starting remoting10:47:52,666 INFO Remoting:74 - Remoting started; listening on addresses :[akka.tcp://sparkDriverActorSystem@...:50605]10:47:52,673 INFO Utils:58 - Successfully started service 'sparkDriverActorSystem' on port 50605.10:47:53,428 INFO SparkEnv:58 - Registering MapOutputTracker10:47:53,448 INFO SparkEnv:58 - Registering BlockManagerMaster10:47:53,460 INFO DiskBlockManager:58 - Created local directory at /tmp/blockmgr-94bbe487-7cf4-4cf5-bcc2-fc538487f31a10:47:53,473 INFO MemoryStore:58 - MemoryStore started with capacity 2.4 GB10:47:53,591 INFO SparkEnv:58 - Registering OutputCommitCoordinator10:47:53,755 INFO Server:272 - jetty-8.y.z-SNAPSHOT10:47:53,809 INFO AbstractConnector:338 - Started SelectChannelConnector@0.0.0.0:404010:47:53,810 INFO Utils:58 - Successfully started service 'SparkUI' on port 4040.10:47:53,813 INFO SparkUI:58 - Started SparkUI at http://10.200.48.112:4040spark.yarn.driver.memoryOverhead is set but does not apply in client mode.10:47:54,996 INFO TimelineClientImpl:296 - Timeline service address: http://dl-rc-optd-ambari-master-v-test-1.host.dataengine.com:8188/ws/v1/timeline/10:47:55,307 INFO ConfiguredRMFailoverProxyProvider:100 - Failing over to rm210:47:55,333 INFO Client:58 - Requesting a new application from cluster with 8 NodeManagers10:47:55,351 INFO Client:58 - Verifying our application has not requested more than the maximum memory capability of the cluster (10240 MB per container)10:47:55,351 INFO Client:58 - Will allocate AM container, with 896 MB memory including 384 MB overhead10:47:55,352 INFO Client:58 - Setting up container launch context for our AM10:47:55,355 INFO Client:58 - Setting up the launch environment for our AM container10:47:55,367 INFO Client:58 - Preparing resources for our AM container10:47:56,298 INFO Client:58 - Uploading resource file:/rc/lib/spark_lib/spark-assembly-1.6.1.2.4.2.0-258-hadoop2.7.1.2.4.2.0-258.jar -> hdfs://chorustest/user/rc/.sparkStaging/application_1499824261147_0015/spark-assembly-1.6.1.2.4.2.0-258-hadoop2.7.1.2.4.2.0-258.jar10:47:59,369 INFO Client:58 - Uploading resource file:/tmp/spark-ea70c397-fad0-44bc-ae1f-7248ed3f3003/__spark_conf__1134932846047586070.zip -> hdfs://chorustest/user/rc/.sparkStaging/application_1499824261147_0015/__spark_conf__1134932846047586070.zip10:47:59,442 WARN Client:70 -hdp.version is not found,Please set HDP_VERSION=xxx in spark-env.sh,or set -Dhdp.version=xxx in spark.{driver|yarn.am}.extraJavaOptionsor set SPARK_JAVA_OPTS="-Dhdp.verion=xxx" in spark-env.shIf you're running Spark under HDP.
10:47:59,456 INFO SecurityManager:58 - Changing view acls to: rc10:47:59,456 INFO SecurityManager:58 - Changing modify acls to: rc10:47:59,456 INFO SecurityManager:58 - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(rc); users with modify permissions: Set(rc)10:47:59,463 INFO Client:58 - Submitting application 15 to ResourceManager10:47:59,694 INFO YarnClientImpl:273 - Submitted application application_1499824261147_0015java.lang.NoSuchMethodError: org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.bindToYarn(Lorg/apache/hadoop/yarn/api/records/ApplicationId;Lscala/Option;)V
在 2017年7月18日星期二 UTC+8下午10:13:53,Jason Plurad写道:
If you scroll back just a few days in the message history of this group, you'll find a link to this nice blog post: "Configuring JanusGraph for spark-yarn" https://groups.google.com/d/msg/janusgraph-users/ 9e82gcUTB4M/evKFnB3cAgAJ
HadoopMarc covers doing an OLAP vertex count with JanusGraph + HBase, which it sounds like what you're trying to do, and it has an example properties file.
I can't really tell what you're trying to do in that code snippet. It would be best if you would share the code publicly on GitHub or BitBucket or something similar so if somebody wanted to try it out, it would be easy to do.
> anybody online? please help me~
JanusGraph is run by volunteers contributing to the open source project. Immediate responses may not happen. Using the mailing list and searching its archive is your best bet for learning from the community because there are several hundred folks that are subscribed to this list.
On Monday, July 17, 2017 at 11:35:47 PM UTC-4, spirit888hill wrote:anybody online? please help me~
在 2017年7月17日星期一 UTC+8下午5:53:32,写道:My graph has about 100 million vertexes and 200 million edges. But if use the following code, it is too slow.
GraphTraversal<Vertex, Long> countV = traversal.V().count();while (countV.hasNext()){
System.out.println("countV:" + countV.next());
}
GraphTraversal<Edge, Long> countE = traversal.E().count();
while (countE.hasNext()){
System.out.println("countE:" + countE.next());
}
I want to computer the count of vertex or edge directly through Hbase. The following code is:
SnapshotCounter.HBaseGetter entryGetter = new SnapshotCounter.HBaseGetter();
EntryList entryList = StaticArrayEntryList.ofBytes(
result.getMap().get(Bytes.toBytes("e")).entrySet(),
entryGetter);
StandardTitanTx tx = (StandardTitanTx) graph.newTransaction();
System.out.println("Entry list size: " + entryList.size());
int cnt = 0;
// IDInspector inspector = graph.getIDInspector();
for (Entry entry : entryList) {
RelationCache relation = graph.getEdgeSerializer().readRelation(entry, false, tx);
// Direction direction = graph.getEdgeSerializer().parseDirection(entry);
// System.out.println("Direction is:" + direction.name());
// System.out.println("relation is:" + relation);
// System.out.println("numProperties: " + relation.numProperties());
// Iterator<LongObjectCursor<Object>> longObjectCursorIterator = relation.propertyIterator();
// LongObjectCursor<Object> next = longObjectCursorIterator.next();
// System.out.println("key is:" + next.key);
// System.out.println("value is:" + next.value);
// System.out.println("next.toString is:" + next.toString());
RelationType type = tx.getExistingRelationType(relation.typeId);
Iterator<Edge> edgeIterator1 = type.edges(Direction.BOTH);
while (edgeIterator1.hasNext()){
Edge next11 = edgeIterator1.next();
System.out.println("relType is :" + next11.property("relType"));
}
// if (type.isEdgeLabel() && !tx.getIdInspector().isEdgeLabelId(relation. relationId)){
// if (type.isEdgeLabel() && !graph.getIDManager().isEdgeLabelId(relation. relationId) &&
// !tx.getIdInspector().isRelationTypeId(type.longId() )) {
if (type.isEdgeLabel() ) {
cnt++;
System.out.print("isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.typeId));
System.out.print("isEdgeLabelId: ");
System.out.println(graph.getIDManager().isEdgeLabelId(relati on.typeId));
System.out.print("type isEdgeLabel: ");
System.out.println(type.isEdgeLabel());
System.out.print("relationId isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.relationId));
System.out.println(entry.getValue().toString());
}
}
System.out.println("Edge count: " + cnt);
I made a test by making a small graph-- two vertexes and two edges.But I just get the count of the edge is one, expecting two. Is there any problem? Please help....This problem bugs me. Thanks~~~
spirit...@...
I run into a new problem when I config the gremlin and spark-onYarn according the post.
在 2017年7月17日星期一 UTC+8下午5:53:32,spi...@...写道:
Am I lacking the dependencies or version conflicts?
10:47:47,199 INFO KryoShimServiceLoader:117 - Set KryoShimService provider to org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService@4b31a708 (class org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService) because its priority value (0) is the highest available10:47:47,199 INFO KryoShimServiceLoader:123 - Configuring KryoShimService provider org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService@4b31a708 with user-provided configuration10:47:51,447 INFO SparkContext:58 - Running Spark version 1.6.110:47:51,495 INFO SecurityManager:58 - Changing view acls to: rc10:47:51,496 INFO SecurityManager:58 - Changing modify acls to: rc10:47:51,496 INFO SecurityManager:58 - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(rc); users with modify permissions: Set(rc)10:47:51,855 INFO Utils:58 - Successfully started service 'sparkDriver' on port 41967.10:47:52,450 INFO Slf4jLogger:80 - Slf4jLogger started10:47:52,504 INFO Remoting:74 - Starting remoting10:47:52,666 INFO Remoting:74 - Remoting started; listening on addresses :[akka.tcp://sparkDriverActorSystem@...:50605]10:47:52,673 INFO Utils:58 - Successfully started service 'sparkDriverActorSystem' on port 50605.10:47:53,428 INFO SparkEnv:58 - Registering MapOutputTracker10:47:53,448 INFO SparkEnv:58 - Registering BlockManagerMaster10:47:53,460 INFO DiskBlockManager:58 - Created local directory at /tmp/blockmgr-94bbe487-7cf4-4cf5-bcc2-fc538487f31a10:47:53,473 INFO MemoryStore:58 - MemoryStore started with capacity 2.4 GB10:47:53,591 INFO SparkEnv:58 - Registering OutputCommitCoordinator10:47:53,755 INFO Server:272 - jetty-8.y.z-SNAPSHOT10:47:53,809 INFO AbstractConnector:338 - Started SelectChannelConnector@0.0.0.0:404010:47:53,810 INFO Utils:58 - Successfully started service 'SparkUI' on port 4040.10:47:53,813 INFO SparkUI:58 - Started SparkUI at http://10.200.48.112:4040spark.yarn.driver.memoryOverhead is set but does not apply in client mode.10:47:54,996 INFO TimelineClientImpl:296 - Timeline service address: http://dl-rc-optd-ambari-master-v-test-1.host.dataengine.com:8188/ws/v1/timeline/10:47:55,307 INFO ConfiguredRMFailoverProxyProvider:100 - Failing over to rm210:47:55,333 INFO Client:58 - Requesting a new application from cluster with 8 NodeManagers10:47:55,351 INFO Client:58 - Verifying our application has not requested more than the maximum memory capability of the cluster (10240 MB per container)10:47:55,351 INFO Client:58 - Will allocate AM container, with 896 MB memory including 384 MB overhead10:47:55,352 INFO Client:58 - Setting up container launch context for our AM10:47:55,355 INFO Client:58 - Setting up the launch environment for our AM container10:47:55,367 INFO Client:58 - Preparing resources for our AM container10:47:56,298 INFO Client:58 - Uploading resource file:/rc/lib/spark_lib/spark-assembly-1.6.1.2.4.2.0-258-hadoop2.7.1.2.4.2.0-258.jar -> hdfs://chorustest/user/rc/.sparkStaging/application_1499824261147_0015/spark-assembly-1.6.1.2.4.2.0-258-hadoop2.7.1.2.4.2.0-258.jar10:47:59,369 INFO Client:58 - Uploading resource file:/tmp/spark-ea70c397-fad0-44bc-ae1f-7248ed3f3003/__spark_conf__1134932846047586070.zip -> hdfs://chorustest/user/rc/.sparkStaging/application_1499824261147_0015/__spark_conf__1134932846047586070.zip10:47:59,442 WARN Client:70 -hdp.version is not found,Please set HDP_VERSION=xxx in spark-env.sh,or set -Dhdp.version=xxx in spark.{driver|yarn.am}.extraJavaOptionsor set SPARK_JAVA_OPTS="-Dhdp.verion=xxx" in spark-env.shIf you're running Spark under HDP.
10:47:59,456 INFO SecurityManager:58 - Changing view acls to: rc10:47:59,456 INFO SecurityManager:58 - Changing modify acls to: rc10:47:59,456 INFO SecurityManager:58 - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(rc); users with modify permissions: Set(rc)10:47:59,463 INFO Client:58 - Submitting application 15 to ResourceManager10:47:59,694 INFO YarnClientImpl:273 - Submitted application application_1499824261147_0015java.lang.NoSuchMethodError: org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.bindToYarn(Lorg/apache/hadoop/yarn/api/records/ApplicationId;Lscala/Option;)V
在 2017年7月17日星期一 UTC+8下午5:53:32,spi...@...写道:
My graph has about 100 million vertexes and 200 million edges. But if use the following code, it is too slow.
GraphTraversal<Vertex, Long> countV = traversal.V().count();while (countV.hasNext()){
System.out.println("countV:" + countV.next());
}
GraphTraversal<Edge, Long> countE = traversal.E().count();
while (countE.hasNext()){
System.out.println("countE:" + countE.next());
}
I want to computer the count of vertex or edge directly through Hbase. The following code is:
SnapshotCounter.HBaseGetter entryGetter = new SnapshotCounter.HBaseGetter();
EntryList entryList = StaticArrayEntryList.ofBytes(
result.getMap().get(Bytes.toBytes("e")).entrySet(),
entryGetter);
StandardTitanTx tx = (StandardTitanTx) graph.newTransaction();
System.out.println("Entry list size: " + entryList.size());
int cnt = 0;
// IDInspector inspector = graph.getIDInspector();
for (Entry entry : entryList) {
RelationCache relation = graph.getEdgeSerializer().readRelation(entry, false, tx);
// Direction direction = graph.getEdgeSerializer().parseDirection(entry);
// System.out.println("Direction is:" + direction.name());
// System.out.println("relation is:" + relation);
// System.out.println("numProperties: " + relation.numProperties());
// Iterator<LongObjectCursor<Object>> longObjectCursorIterator = relation.propertyIterator();
// LongObjectCursor<Object> next = longObjectCursorIterator.next();
// System.out.println("key is:" + next.key);
// System.out.println("value is:" + next.value);
// System.out.println("next.toString is:" + next.toString());
RelationType type = tx.getExistingRelationType(relation.typeId);
Iterator<Edge> edgeIterator1 = type.edges(Direction.BOTH);
while (edgeIterator1.hasNext()){
Edge next11 = edgeIterator1.next();
System.out.println("relType is :" + next11.property("relType"));
}
// if (type.isEdgeLabel() && !tx.getIdInspector().isEdgeLabelId(relation. relationId)){
// if (type.isEdgeLabel() && !graph.getIDManager().isEdgeLabelId(relation. relationId) &&
// !tx.getIdInspector().isRelationTypeId(type.longId() )) {
if (type.isEdgeLabel() ) {
cnt++;
System.out.print("isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.typeId));
System.out.print("isEdgeLabelId: ");
System.out.println(graph.getIDManager().isEdgeLabelId(relati on.typeId));
System.out.print("type isEdgeLabel: ");
System.out.println(type.isEdgeLabel());
System.out.print("relationId isSystemRelationTypeId: ");
System.out.println(graph.getIDManager().isSystemRelationType Id(relation.relationId));
System.out.println(entry.getValue().toString());
}
}
System.out.println("Edge count: " + cnt);
I made a test by making a small graph-- two vertexes and two edges.But I just get the count of the edge is one, expecting two. Is there any problem? Please help....This problem bugs me. Thanks~~~