(* Reading list of names from https://mushroomobserver.org/name/observation_index *) (* It is web-version (API-version gives too many false names). *) (* Use mushroomobserver-web.nb to get data before using it. *) maintainReadMO := Module[{url0, dirsave, fileinfo, ml, file, text, match, text1, items, match1, match2, id, nid, names, name, tab, n1, n2}, url0 = "https://mushroomobserver.org/name/observation_index"; dirsave = FileNameJoin[{dirwork, "data", "mushroomobserver", "web"}]; checkdir[dirsave]; fileinfo = FileNameJoin[{dirsave, "info.m"}]; checkfile[fileinfo]; Get[fileinfo]; ml = numberOfPages; Print["----- Reading list of mushroom names. URL: ", url0]; Print["Directory: ", dirsave, " Number of saved pages: ", ml]; dataMO = Join @@ Table[ file = FileNameJoin[{dirsave, "page" <> ToString[npage] <> ".htm"}]; If[! FileExistsQ[file], Print["Error: file ", file, " does not exist!"]]; text = Import[file, "Text"]; match = Shortest[ "" ~~ tab__ ~~ "
"]; text1 = StringCases[text, match -> tab]; If[Length[text1] =!= 1, Print["Error: Length[text1]=!=1, file ", file]; text = "", text = text1[[1]]; ]; match = "" ~~ Whitespace ... ~~ "" ~~ Whitespace ... ~~ "" ~~ Shortest[content__] ~~ "" ~~ Whitespace ... ~~ "[" ~~ nid : DigitCharacter .. ~~ "]" ~~ Whitespace ... ~~ "" ~~ Whitespace ... ~~ ""; items = StringCases[text, match :> {ToExpression[id], content, ToExpression[nid]}]; If[items === {}, Print["Error: items==={}, text: ", text]; ]; items, {npage, ml}]; idMO[_] := {}; timesMO[_] := 0; match1 = "" ~~ Shortest[nme__] ~~ "" ~~ __; match2 = "" ~~ Shortest[nme__] ~~ "" ~~ __; namesMO = Table[ {id, text, nid} = dat; names = StringCases[text, match1 -> nme, 1]; If[names === {}, names = StringCases[text, match2 -> nme, 1]]; name = If[names === {}, "", names[[1]]]; If[name =!= "", idMO[name] = Append[idMO[name], id]; timesMO[name] = timesMO[name] + nid; ]; stringtrim[name], {dat, dataMO}]; namesMO = Select[namesMO, (# =!= "") &] // Union; n1 = Length[namesMO]; namesMO = Select[namesMO, StringMatchQ[#, matchName] &]; n2 = Length[namesMO]; Print["Found ", n1, " -> ", n2, " mushroom names."]; ]; (* could be used to get data instead of mushroomobserver-web.nb *) maintainGetMO := Module[ {dt, url10, dirsave, alert, condition, file0, matchlast, text, ml, url, file, fileinfo}, (* web-version *) (* Mathematica 11.1 *) dt = 5; (* Pause time *) url0 = "https://mushroomobserver.org/name/observation_index"; dirsave = FileNameJoin[{dirwork, "data", "mushroomobserver", "web"}]; checkdir1[dirsave]; (*alert="
";*) alert = "alert-danger"; condition = (StringFreeQ[#, alert] && StringLength[#] > 99) &; file0 = FileNameJoin[{dirsave, "index.htm"}]; urlsave[url0, file0, condition]; checkfile[file0]; matchlast = "..." ~~ Whitespace ... ~~ "
  • "; text = Import[file0, "Text"]; ml = StringCases[text, matchlast -> nlast]; If[ml === {}, Print["Error: no matches in file ", file0]; ml = 0, ml = ml[[1]]; ]; ml = ToExpression[ml]; If[Head[ml] =!= Integer, Print["Error: ml not integer: ", ml]; ml = 0]; Print["URL: ", url0]; Print["Number of pages: ", numberOfPages = ml]; Print["Current date: ", currentDate = Date[] // DateString]; Do[ url = url0 <> "?page=" <> ToString[npage]; file = FileNameJoin[{dirsave, "page" <> ToString[npage] <> ".htm"}]; If[FileType[file] =!= File, Pause[dt]]; urlsave[url, file, condition], {npage, ml}]; fileinfo = FileNameJoin[{dirsave, "info.m"}]; If[FileExistsQ[fileinfo], DeleteFile[fileinfo]]; Save[fileinfo, {url0, numberOfPages, currentDate}]; ];