SELECTdistinct title from lemmas intooutfile'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/410_title.csv'fieldsterminatedby','optionallyenclosedby'"'linesterminatedby'\r\n';
SELECT disambi, title, abstract, curLink, exterLink from lemmas intooutfile'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/410_disambi.csv'fieldsterminatedby','optionallyenclosedby'"'linesterminatedby'\r\n';
SELECTdistinct redirect from lemmas intooutfile'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/410_redirect.csv'fieldsterminatedby','optionallyenclosedby'"'linesterminatedby'\r\n';
SELECT disambi, subject from lemmas where subject != ''intooutfile'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/410_disambi_subject.csv'fieldsterminatedby','optionallyenclosedby'"'linesterminatedby'\r\n';
SELECT title, disambi from lemmas intooutfile'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/410_title_disambi.csv'fieldsterminatedby','optionallyenclosedby'"'linesterminatedby'\r\n';
SELECT disambi, redirect from lemmas where redirect !=''intooutfile'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/410_disambi_redirect.csv'fieldsterminatedby','optionallyenclosedby'"'linesterminatedby'\r\n';
SELECT disambi, infobox from lemmas where infobox!=''intooutfile'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/410_disambi_infobox.csv'fieldsterminatedby','optionallyenclosedby'"'linesterminatedby'\r\n';
而后我们将导出的csv文件转移到<NEO4J_HOME>\import文件夹内:
文件移动Dos命令:
1 2 3 4 5 6 7 8 9
C:\Users\mmm>move C:\"ProgramData\MySQL\MySQL Server 8.0\Uploads\*.csv" C:\my\neo4j\import\ C:\ProgramData\MySQL\MySQL Server 8.0\Uploads\410_disambi.csv C:\ProgramData\MySQL\MySQL Server 8.0\Uploads\410_disambi_infobox.csv C:\ProgramData\MySQL\MySQL Server 8.0\Uploads\410_disambi_redirect.csv C:\ProgramData\MySQL\MySQL Server 8.0\Uploads\410_disambi_subject.csv C:\ProgramData\MySQL\MySQL Server 8.0\Uploads\410_redirect.csv C:\ProgramData\MySQL\MySQL Server 8.0\Uploads\410_title.csv C:\ProgramData\MySQL\MySQL Server 8.0\Uploads\410_title_disambi.csv 移动了 7 个文件。
import re import commands class Clean(object): @staticmethod def clean_word(word, clean_level='others'): """ Remove symbols in words :word word with unicode :clean_level keep different symbols for disambi/title :return clean word """ word = word.strip() if clean_level == "title": word = word.strip().strip("\"").replace("\n", " ").replace("\"","").strip(u"\\") elif clean_level == "subject": word = word.replace("\"", "").strip("\\").strip() elif clean_level == "redirect": word = word.strip("\"") elif clean_level == "disambi": word = re.sub(u"[,。、&∈*.↑【2—‘:“#> BFR·Z<bf≈j×~①Ⅲ⑤⑨÷〔!%》-』1→5=AE∧I/″▲; ]ξaeφi}④⑧…─☆《『0В<D∪L±γ′TXλ: dh|③⑦~、℃'〉+」/】3〕Δ’;”?■CGΨ[=μ_cgβ㈧o{②⑥'⑩。\~\!\@\#\$\%\^\&\*\(\)\_\-\+\=\{\}\[\]\\\|\:\;\'\"\.\>\?\/\,\xa0\u00a0\u3000\r\n]", "", word) elif clean_level == 'others': word = re.sub(u"[,。、&∈*.↑【2—‘:“#> BFR·Z<bf≈j×~①Ⅲ⑤⑨÷〔!%)》-』1→5=AE∧I/″▲; ]ξaeφi}④⑧…─☆(《『0В<D∪L±γ′TXλ: dh|③⑦~、℃'〉+」/】3〕Δ’;”?■CGΨ[=μ_cgβ㈧o{②⑥'⑩。\~\!\@\#\$\%\^\&\*\(\)\_\-\+\=\{\}\[\]\\\|\:\;\'\"\.\>\?\/\,\xa0\u00a0\u3000\r\n]", "", word) return word
C:\my\neo4j\import>..\bin\neo4j-import --multiline-fields=true --bad-tolerance=1000000 --into ..\data\databases\graph.db --id-type string --nodes:Title title_header.csv,410_title_new.csv --nodes:Disambi disambi_headers.csv,410_disambi_new.csv --nodes:Redirect redirect_header.csv,410_redirect.csv --nodes:Subject subject_header.csv,all_subject.csv --relationships:DisambiName title_disambi_header.csv,410_title_disambi_out.csv --relationships:RedirectName disambi_redirect_header.csv,410_disambi_redirect_out.csv --relationships:DisambiSubject disambi_subject_header.csv,disambi_subject.csv --relationships:Role disambi_infobox_header.csv,410_disambi_infobox_out.csv; WARNING: neo4j-import is deprecated and support for it will be removed in a future version of Neo4j; please use neo4j-admin import instead. Neo4j version: 3.5.11 Importing the contents of these files into C:\my\neo4j\data\databases\graph.db: Nodes: :Title C:\my\neo4j\import\title_header.csv C:\my\neo4j\import\410_title_new.csv
Available resources: Total machine memory: 15.85 GB Free machine memory: 9.93 GB Max heap memory : 3.52 GB Processors: 8 Configured max memory: 11.09 GB High-IO: false
Import starting 2019-10-29 17:05:36.714+0800
······
Imported: 10,416,647 nodes 37,317,167 relationships 45,049,533 properties Peak memory usage: 1.03 GB There were bad entries which were skipped and logged into C:\my\neo4j\import\..\data\databases\graph.db\bad.log