(* Get Wikipedia articles *) wikiEn[name_String] := ""; maintainWiki := ( Print["Getting Wikipedia data ..."]; textcondit = ((StringLength[#] > 10) &); (* English wiki *) lang = "en"; dirwiki = ToFileName[{dirdata, "wiki", lang}]; checkdir1[dirwiki]; Do[ title = ""; name1 = StringReplace[name, " " -> "_"]; url = "http://" <> lang <> ".wikipedia.org/wiki/" <> name1; file = ToFileName[dirwiki, htmlname[name] <> ".m"]; If[FileType[file] =!= File, filetemp = ToFileName[dirtemp, "wikilookup-" <> lang <> ".htm"]; If[FileType[filetemp] === File, DeleteFile[filetemp]]; {response, text} = urlsave[url, filetemp, textcondit]; Save[file, {response, text}]; ]; Get[file]; If[response[[2]] != 200, nPrint["Warning: URL = ", url, ": bad response: ", response[[2]]], title = StringCases[text, ""][[1]] // removetags; If[Head[title] =!= String, Print["Error: Head[title]=!=String: ", name, " ", lang], If[title =!= name, Print["Redirecting: ", name, " -> ", title]; redirecten[name] = title; ]; filedata = ToFileName[dirwiki, htmlname[title] <> ".json"]; text1 = StringReplace[text, RegularExpression["(?i)(?s)\\s*(.*?)\\s*"] -> ""]; content = StringCases[text1, ""]; content = Table[ par1 = StringReplace[par, RegularExpression["(?i)(?s)\\s*(.*?)\\s*

"] -> "$1"]; par1 = StringReplace[par1, RegularExpression["(?i)(?s)\\s*(.*?)\\s*"] -> ""]; par1 = StringReplace[par1, RegularExpression["(?i)(?s)(.*?)"] -> "$1"]; Do[ par1 = StringReplace[par1, RegularExpression["(?i)(?s)(.*?)"] -> "$1"], {5}]; par1 = stringtrim[par1]; par1, {par, content}]; content = Select[content, # =!= "" &]; If[content === {}, Print["Error wiki: content==={}: ", name, " ", lang], wikitexten[name] = content]; namea = StringReplace[name, " " -> "_"]; title1 = StringReplace[title, " " -> "%20"]; name1a = StringReplace[name1, " " -> "_"]; url = "https://" <> lang <> ".wikipedia.org/w/api.php?action=query&prop=extracts&exintro&titles=" <> title1 <> "&format=json"; urlsave[url, filedata, textcondit]; (* Better to use this, if it is defined *) If[FileType[filedata] === File, a = Import[filedata]; If[Cases[a, "missing", Infinity, 1] === {}, b = Cases[a, Rule["extract", b_] :> b, Infinity, 1][[1]]; If[Head[b] =!= String, Print["Internal error: file ", filedata, " incorrectly parsed. URL: ", url, " File content: ", a], If[! StringFreeQ[b, "This is a redirect"], url1 = "http://en.wikipedia.org/wiki/" <> name1a; Print["Warning: Wikipedia: add a new redirect: ", name, ": see ", Hyperlink["Wikipedia article: " <> namer, url1]]; filet = ToFileName[dirtemp, "wikiarticle.htm"]; resp = URLSave[url1, filet, {"Headers", "StatusCode"}]; If[resp[[2]] == 200, txt = Import[filet, "Text"]; txt = StringCases[txt, "" ~~ x__ ~~ "" -> x, 1]; If[MatchQ[txt, {_}], newname = StringTrim[StringSplit[txt[[1]], " - "][[1]]]; Print[" - add new line to maintain-wiki.m: \"", name, "\" -> \"", newname, "\""];]]; Print["File ", filedata, " should be deleted!"]; ]; content = StringSplit[ b, {WhitespaceCharacter___ ~~ "

" ~~ WhitespaceCharacter___ ~~ "

" ~~ WhitespaceCharacter}]; content = Table[ par1 = StringReplace[par, {"

" -> "", "

" -> ""}]; par1 = stringtrim[par1]; par1, {par, content}]; content = Select[content, # =!= "" &]; If[content === {}, Print["Error: content==={}: ", name, " ", lang], wikitext1en[name] = wikitexten[name]]; ]], Print["Warning: file ", filedata, " not found."]; ]; ]; (* If[Head[title] =!= String, *) ]; (* If[response[[2]] != 200, *) (* First paragraph of Wiki *) wikipar = wikitext1en[name]; If[Head[wikipar] === List && wikipar =!= {}, wikiEn[name] = wikipar[[1]]], {name, namesListSort}]; (* MediaWiki's API *) (* Temporary suppress messages like: val::shdw: Symbol val appears in multiple contexts {ChannelFramework`Private`,Global`}; definitions in context ChannelFramework`Private` may shadow or be shadowed by other definitions. *) Off[General::shdw]; dirwikiproperties = ToFileName[{dirdata, "wiki", "properties"}]; checkdir1[dirwikiproperties]; wikipediaProperties = properties = {"ArticlePlaintext", "SummaryPlaintext", "LanguagesURLRules", "TitleTranslationRules"}; Do[ ClearAll[wikipediaData]; file = FileNameJoin[{dirwikiproperties, htmlname[name] <> ".m"}]; If[FileType[file] === File, Get[file], Do[ wikipediaData[name, property] = WikipediaData[name, property], {property, properties}]; Save[file, {properties, wikipediaData}]; ]; (* Currently, only languages-data are used *) a = wikipediaData[name, "LanguagesURLRules"]; b = wikipediaData[name, "TitleTranslationRules"]; wdata = {}; If[MatchQ[a, List[__]] && MatchQ[b, List[__]], a = Transpose[a /. Rule -> List]; b = Transpose[b /. Rule -> List]; langa = a[[1]]; langb = b[[1]]; If[langa === langb, wdata = {a[[1]], a[[2]], b[[2]]} // Transpose; wdata = Union[wdata, SameTest -> (#1[[3]] === #2[[3]] &)]; wdata = Select[wdata, #[[3]] =!= name &]; syns = nameSynonyms[name]; If[Head[syns] === List, wdata = Select[wdata, !MemberQ[syns, #[[3]]]&]], Print["Wikipedia-properties: Warning: ", name, ": mismatch of languages: ", {langa, langb}]]; ]; foreignNames[name] = If[wdata === {}, "", wd = Table[ {lang, link, fname} = wdat; lang <> ": link <> "\">" <> fname <> "", {wdat, wdata}]; StringRiffle[wd, ", "] <> "."], {name, namesListSort}]; On[General::shdw]; );