-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_data.py
More file actions
70 lines (60 loc) · 2.23 KB
/
load_data.py
File metadata and controls
70 lines (60 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import json
import os
import weaviate
import key_config
from langchain.embeddings import OpenAIEmbeddings
with open("data/code_cu.json") as f:
contents = f.read()
data = json.loads(contents)
# f = open("source_code.json")
# data = json.load(f)
print(f"+++Data: {data}")
# [test file load]
# for i, method in enumerate(data["methods"]):
# print(f"+++method: {i+1}")
# print(method["method_name"])
embeddings_model = OpenAIEmbeddings()
# [test embedding]
# embeddings = embeddings_model.embed_documents(["Hi there!"])
# print(len(embeddings[0]))
# print(len(embeddings))
client = weaviate.Client(os.environ.get("WEAVIATE_URL"))
print(f"+++Weaviate is ready? {client.is_ready()}")
client.batch.configure(batch_size=100)
with client.batch as batch:
for i, method in enumerate(data["methods"]):
print(f"+++importing method info: {i+1}")
method_summary = (
"method name is ["
+ method["method_name"]
+ "], package name is ["
+ method["package_name"]
+ "], class name is ["
+ method["class_name"]
+ "], this method "
+ ("" if method["is_interface"] else "not")
+ " belongs to an interface"
+ ", parameters in this method are "
+ json.dumps(method["parameters"])
+ ", invoked methods in this method are "
+ json.dumps(method["invoked_methods"])
+ ", method description is ["
+ method["method_desc"]
+ "]"
)
properties = {
"method_name": method["method_name"],
"package_name": method["package_name"],
"class_name": method["class_name"],
"is_interface": method["is_interface"],
"parameters": method["parameters"],
"invoked_methods": method["invoked_methods"],
"method_desc": method["method_desc"],
"method_summary": method_summary,
}
embed_document = method_summary
print(f"+++embed document: {embed_document}")
embeddings = embeddings_model.embed_documents([embed_document])
print(len(embeddings))
print(len(embeddings[0]))
batch.add_data_object(properties, "Codev1", vector=embeddings[0])