(* Reading list of names from https://mushroomobserver.org/name/observation_index *)
(* It is web-version (API-version gives too many false names). *)
(* Use mushroomobserver-web.nb to get data before using it. *)
maintainReadMO := Module[{url0, dirsave, fileinfo, ml, file, text, match, text1, items, match1, match2, id, nid, names, name, tab, n1, n2},
url0 = "https://mushroomobserver.org/name/observation_index";
dirsave = FileNameJoin[{dirwork, "data", "mushroomobserver", "web"}];
checkdir[dirsave];
fileinfo = FileNameJoin[{dirsave, "info.m"}];
checkfile[fileinfo];
Get[fileinfo];
ml = numberOfPages;
Print["----- Reading list of mushroom names. URL: ", url0];
Print["Directory: ", dirsave, " Number of saved pages: ", ml];
dataMO = Join @@ Table[
file = FileNameJoin[{dirsave, "page" <> ToString[npage] <> ".htm"}];
If[! FileExistsQ[file],
Print["Error: file ", file, " does not exist!"]];
text = Import[file, "Text"];
match =
Shortest[
"
"];
text1 = StringCases[text, match -> tab];
If[Length[text1] =!= 1,
Print["Error: Length[text1]=!=1, file ", file];
text = "",
text = text1[[1]];
];
match = "" ~~ Whitespace ... ~~ "" ~~ Whitespace ... ~~
"" ~~ Shortest[content__] ~~
"" ~~ Whitespace ... ~~ "[" ~~ nid : DigitCharacter .. ~~
"]" ~~ Whitespace ... ~~ " | " ~~ Whitespace ... ~~ "
";
items =
StringCases[text,
match :> {ToExpression[id], content, ToExpression[nid]}];
If[items === {},
Print["Error: items==={}, text: ", text];
];
items, {npage, ml}];
idMO[_] := {};
timesMO[_] := 0;
match1 = "" ~~ Shortest[nme__] ~~ "" ~~ __;
match2 = "" ~~ Shortest[nme__] ~~ "" ~~ __;
namesMO = Table[
{id, text, nid} = dat;
names = StringCases[text, match1 -> nme, 1];
If[names === {}, names = StringCases[text, match2 -> nme, 1]];
name = If[names === {}, "", names[[1]]];
If[name =!= "",
idMO[name] = Append[idMO[name], id];
timesMO[name] = timesMO[name] + nid;
];
stringtrim[name], {dat, dataMO}];
namesMO = Select[namesMO, (# =!= "") &] // Union;
n1 = Length[namesMO];
namesMO = Select[namesMO, StringMatchQ[#, matchName] &];
n2 = Length[namesMO];
Print["Found ", n1, " -> ", n2, " mushroom names."];
];
(* could be used to get data instead of mushroomobserver-web.nb *)
maintainGetMO := Module[
{dt, url10, dirsave, alert, condition, file0, matchlast, text, ml,
url, file, fileinfo},
(* web-version *)
(* Mathematica 11.1 *)
dt = 5; (* Pause time *)
url0 = "https://mushroomobserver.org/name/observation_index";
dirsave = FileNameJoin[{dirwork, "data", "mushroomobserver", "web"}];
checkdir1[dirsave];
(*alert="";*)
alert = "alert-danger";
condition = (StringFreeQ[#, alert] && StringLength[#] > 99) &;
file0 = FileNameJoin[{dirsave, "index.htm"}];
urlsave[url0, file0, condition];
checkfile[file0];
matchlast =
"
..." ~~ Whitespace ... ~~
"
";
text = Import[file0, "Text"];
ml = StringCases[text, matchlast -> nlast];
If[ml === {},
Print["Error: no matches in file ", file0];
ml = 0,
ml = ml[[1]];
];
ml = ToExpression[ml];
If[Head[ml] =!= Integer,
Print["Error: ml not integer: ", ml];
ml = 0];
Print["URL: ", url0];
Print["Number of pages: ", numberOfPages = ml];
Print["Current date: ", currentDate = Date[] // DateString];
Do[
url = url0 <> "?page=" <> ToString[npage];
file = FileNameJoin[{dirsave, "page" <> ToString[npage] <> ".htm"}];
If[FileType[file] =!= File, Pause[dt]];
urlsave[url, file, condition], {npage, ml}];
fileinfo = FileNameJoin[{dirsave, "info.m"}];
If[FileExistsQ[fileinfo], DeleteFile[fileinfo]];
Save[fileinfo, {url0, numberOfPages, currentDate}];
];