(* Definitions of functions *) (* Previously defined variables in html.mx *) ClearAll[stringtrim, porder]; printf[x_] := ToString[NumberForm[N[x], 11, ExponentFunction -> (Null &)]]; printstop[s__] := ( Print["Fatal error: ", s]; If[Input[ToString[First[{s}]] <> " --- Exiting? (True or False)", True], Exit[]]; ); printnames[data_] := ( If[ifPrintNames === True, Print[data // TableForm]]; ); checkdir[dir_] := If[Head[dir] =!= String, printstop["checkdir: " <> ToString[InputForm[dir]] <> " not a string."], If[FileType[dir] =!= Directory, printstop["checkdir: " <> dir <> " not a directory."]; ]]; checkdir1[dir_] := If[Head[dir] =!= String, printstop["checkdir: " <> ToString[InputForm[dir]] <> " not a string."], If[!DirectoryQ[dir], Print["Creating directory ", dir]; CreateDirectory[dir]]]; checkfile[file_] := If[Head[file] =!= String, printstop["checkfile: " <> ToString[InputForm[file]] <> " not a string."], If[FileType[file] =!= File, printstop["checkfile: " <> file <> " not a file."]; ]]; silentrun[command_String] := Module[{rundir, batchfile, linkfile}, rundir = ToFileName[{dirwork, "temp"}]; checkdir1[rundir]; SetDirectory[rundir]; batchfile = "mathcommand.bat"; (* It should be a shortcut to batchfile. Properties should be set as Run minimized. *) linkfile = "linkcommand.lnk"; checkfile /@ {batchfile, linkfile}; Export[batchfile, command, "Text"]; SystemOpen[linkfile]; ResetDirectory[]; ]; wget[url_String, outfile_String, logfile_String] := Module[{useragent, tempfile, url1, command, line2, line3, text}, useragent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9"; tempfile = "e:\\temp\\current.log"; Export[outfile, "", "Text"]; checkfile[outfile]; Export[tempfile, "", "Text"]; checkfile[tempfile]; (* DOS operating system interpretation of % *) url1 = StringReplace[url, "%" -> "%%"]; command = "wget --no-check-certificate -d -t 30 -U \"" <> useragent <> "\" -O \"" <> outfile <> "\" -o \"" <> tempfile <> "\" \"" <> url1 <> "\""; Run[command]; (* line20 = "echo --------------- >> \"" <> logfile <> "\""; line2 = "type \"" <> tempfile <> "\" >> \"" <> logfile <> "\""; line3 = "del \"" <> tempfile <> "\""; silentrun[command <> "\n" <> line20 <> "\n" <> line2 <> "\n" <> line3]; *) (* Waiting until it finishes *) Do[Pause[0.2]; If[FileType[tempfile] =!= File, Break[]], {5000}]; text = Import[outfile, "Text"]; If[StringLength[text] < 10, Print["Warning: wget: output is too short: ", text]]; text]; proxify[url_String] := StringReplace[url, RegularExpression["^http://([^/]+)/"] -> "http://$1.6a.nl/"]; progname[f_String] := If[FileType[f] =!= File, Print["Error: progname: File ", f, " does not exist. Exiting ..."]; Exit[], pname = StringCases[Import[f, "Text"], RegularExpression["[(][*]\\s*(.*)\\s*[*][)]"] :> "$1"]; pname = If[pname === {}, "Unnamed", First[pname]]]; toslash[s_String] := StringReplace[s, "\\" -> "/"]; tobackslash[s_String] := StringReplace[s, "/" -> "\\"]; stringtrim[s_String] := StringReplace[ StringReplace[s, Whitespace -> " "], {StartOfString ~~ " " -> "", " " ~~ EndOfString -> ""}]; flankblank[s_String] := StringReplace[s, RegularExpression["(^\\s+|\\s+$)"] -> ""]; p[s_] := If[Head[s] === String, Write[of, s], Print["Error: function p[]: attempt to print non-string: ", s]]; (* Adding or removing special HTML-symbols *) addsymb[s_String] := StringReplace[s, { Whitespace -> " ", "<" -> "<", ">" -> ">", "&" -> "&", "\"" -> """, "'" -> "'", "_" -> " " }]; removesymb[s_String]:= StringReplace[s,{ "<" -> " ", ">" -> " ", "&" -> " ", """ -> " ", "'" -> " ", " " -> " " }]; removesymb1[s_String]:= StringReplace[s,{ "<" -> " ", ">" -> " ", "&" -> " ", """ -> " ", "'" -> " ", " " -> " ", "\n" -> " ", "<" -> " ", ">" -> " ", "&" -> " ", "\"" -> " ", "'" -> " ", "_" -> " " }]; shrink[s0_, cleft_Integer, cright_Integer, smiddle_String] := ""; shrink[s0_String, cleft_Integer, cright_Integer, smiddle_String] := Module[{s, slen, ctot, sp, dc, cl, cr, cpleft, spright, sleft, sright, nwords, words}, s = StringReplace[s0, Whitespace -> " "]; slen = StringLength[s]; ctot = cleft + cright; If[ctot + 2 > slen, Return[s]]; sp = StringPosition[s, RegularExpression["\\s\\S+\\s"]]; dc = Floor[StringLength[smiddle]/2]; cl = Max[cleft - dc, 0]; cr = Max[cright - dc, 0]; spleft = Select[sp, #[[2]] <= cl &]; spright = Select[sp, #[[1]] >= slen - cr + 1 &]; sleft = If[spleft === {}, StringTake[s, cl], StringTake[s, Last[spleft][[2]]]]; sright = If[spright === {}, StringTake[s, -cr], StringDrop[s, First[spright][[1]] - 1]]; nwords = Length[sp] - Length[spleft] - Length[spright]; words = If[nwords == 0, "", If[nwords == 1, "1 word", ToString[nwords] <> " words"]]; sleft <> StringReplace[smiddle, "WORDS" -> words] <> sright ]; (* Transformation x->0x *) fixed2[s0_]:=Module[{s}, s=ToString[s0]; If[StringLength[s]<2,s="0"<>s]; s ]; (* Transformation x->00x, xx->0xx *) fixed3[s0_]:=Module[{s}, s=ToString[s0]; If[StringLength[s]<3,s="0"<>s]; If[StringLength[s]<3,s="0"<>s]; s ]; stripext[s_String] := StringDrop[s, -4]; porder[s1_String, s2_String] := OrderedQ[{StringTake[s1 <> "000", 3], StringTake[s2 <> "000", 3]}]; ifprocessed[photo_] := Module[{dir, file, fileproc}, {dir, file} = photo; fileproc = ToFileName[{dir0, "processed", "img" <> dir}, StringReplace[file, {"dsc" -> "", "_" -> "", "-" -> ""}] <> ".jpg"]; ifprocessed[photo] = (FileType[fileproc] === File) ]; hex[c_] :=(* Converting to hexadecimal form *) StringJoin @@ ((ToString /@ IntegerDigits[Round[255. c], 16, 2]) /. {"10" -> "A", "11" -> "B", "12" -> "C", "13" -> "D", "14" -> "E", "15" -> "F"}); shortName[name_String] := Module[{a}, a = StringReplace[StringReplace[ StringReplace[name, " \[Times] " -> " "], "<" ~~ Shortest[__] ~~ ">" -> ""], RegularExpression["(.*? .*?) .*"] -> "$1"]; If[StringMatchQ[a, matchName2], a, Print["shortName: Warning: name ", name, " cannot be shortened."]; ""]]; (* Recently added *) (* Removing HTML tags from the string *) removetags[s_String] := StringReplace[s, RegularExpression["(?i)(?s)<.{0,555}?>"] -> ""] // stringtrim; (* Saving a web page to a file, if a certain condition is satisfied *) urlsave[url_String, file_String, condition_] := Module[{resp, text}, {resp, text} = {"", ""}; If[FileNameTake[file] =!= file && FileType[DirectoryName[file]] =!= Directory, Print["Error: urlsave[", {url, file, condition}, "]: directory for the file does not exist."], If[FileType[file] =!= File(* && failedAttempts[nYear] < failedAttemptsM*), (* main body *) Print["Saving URL ", url, " to the file ", file]; useragent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9"; (* requestheaders = If[ValueQ[requestHeaders], requestHeaders, <|"UserAgent" -> useragent|>]; *) (* For some unknown reason, since Jan. 19, 2020 it requires non-empty Username and Password *) requestheaders = <|"UserAgent" -> useragent, "Username" -> "aa", "Password" -> "bb"|>; resp = URLDownload[HTTPRequest[url, requestheaders], file, {"Headers", "StatusCode"}]; statusCode = "StatusCode" /. resp; If[statusCode != 200, Print["Warning: URL = ", url, ": bad response: ", resp]; (* failedAttempts[nYear] = failedAttempts[nYear] + 1; If[failedAttempts[nYear] == failedAttemptsM, Print["Warning: maximum download failed attempts reached for the tag: ", nYear]];*) If[FileType[file] === File, Print["File ", file, " will be deleted!"]; Print[Short[Import[file, "Text"], 10]]; DeleteFile[file];];]; If[FileType[file] === File, text = Import[file, "TEXT"]; If[!condition[text], Print["Warning: URL = ", url, ": file ", file, " will be deleted because the condition ", condition, " is not satisfied."]; Print["Content of the file ", file, ": ", Short[text, 20]]; DeleteFile[file]; ], Print["Error: urlsave[", {url, file, condition}, "]: output file was not created."]; ]; (* *) ]; ]; {resp, text}]; (* Saving a web page to a file with javascripts *) urlsavejs[url_String, namejs_String, file_String] := Module[{dirnode, dirfile, filejs, filejs1, file1, textjs, textjs1}, dirnode = "C:\\temp"; dirfile = DirectoryName[file]; filejs = FileNameJoin[{dirwork, namejs}]; checkfile[filejs]; filejs1 = FileNameJoin[{dirnode, namejs}]; file1 = FileNameJoin[{dirnode, "page.htm"}]; textjs = Import[filejs, "Text"]; checkdir[dirnode]; SetDirectory[dirnode]; textjs1 = StringReplace[textjs, "URL" -> url]; Export[filejs1, textjs1, "Text"]; If[FileExistsQ[file1], DeleteFile[file1]]; Run["node " <> namejs]; If[FileExistsQ[file1], If[FileType[dirfile] =!= Directory, CreateDirectory[dirfile]]; If[FileExistsQ[file], DeleteFile[file]]; CopyFile[file1, file]; FileByteCount[file1], Print["Warning: urlsavejs(", url, "): file ", file1, " does not exist."]; 0] ]; (* Updating data by replacing obsolete names *) updatenames[data_List] := Module[{datat}, datat = Transpose[data]; datat[[1]] = datat[[1]] /. replnames; Transpose[datat]]; (* HTML names without spaces *) htmlname[s_String] := StringReplace[s, {" " -> "_", "(" -> "-", ")" -> "-"}] // ToLowerCase // transliterate; (* Improve top line *) toplinefix[s_String] := StringReplace[s, Whitespace~~">"~~Whitespace -> " \:203a "]; (* Improve classification line *) classlinefix[s_String] := StringReplace[s, {Whitespace~~">"~~Whitespace -> ": ", "kingdom" -> kingdom, "phylum" -> phylum, "class" -> class, "order" -> order, "family" -> family, "species" -> "" <> namehtml <> ""}]; (* Reads all page fragments marked by ... *) spanRead[text_String] := Module[{spans, vars, varst, var, val}, ClearAll[spanOld, spanNew]; spanNew[s_String] := spanOld[s]; spans = StringCases[text, RegularExpression["(?s)(.*)?"] -> {"$1", "$2"}, Overlaps -> True]; vars = Transpose[spans][[1]]; varst = Tally[vars]; If[Length[varst] != Length[vars], Print["Warning: spanRead: Labels not unique: ", varst]]; Do[ {var, val} = span; var = ToLowerCase[var]; spanOld[var] = val, {span, spans}]; noPrint["spanRead: ", vars]; spans]; (* Replaces first-level fragments marked by ... by the rule spanIn[XXX] -> spanOut[XXX] *) spanReplace[text_String] := Module[{spans, vars, var0, var, rep, val, text1}, spans = StringCases[text, RegularExpression["(?s)(.*)?"] -> {"$1", "$2"}]; text1 = text; Do[ {var, val} = span; Clear[val1]; val1 = spanNew[ToLowerCase[var]]; If[Head[val1] === String, text1 = StringReplace[text1, " var <> ">" <> val <> " var <> ">" -> val1], Print["Warning: spanReplace: spanNew[var] not defined for var = : ", var]], {span, spans}]; text1]; getAttributes[name_String] := Module[{spans, vars, var0, var, rep, val, text1}, (* data[[2]] - Rus. and Eng. common names - not necessary *) longdash = FromCharacterCode[8211]; id = selectid[name]; {nameid, namehtml, synonyms, kingdom, phylum, class, order, family, genus, species} = idcomponents[#, id]&/@ {"name", "name_html", "synonyms", "kingdom", "phylum", "class", "order", "family", "genus", "species"}; classline4 = classlinefix["kingdom > phylum > class > order"]; classlinefull = classlinefix["kingdom > phylum > class > order > family > species"]; nameL = ToLowerCase[name]; nameUnd = StringReplace[name, " " -> "_"]; nameLUnd = ToLowerCase[nameUnd]; nameidL = ToLowerCase[nameid]; nameidUnd = StringReplace[nameid, " " -> "_"]; nameidLUnd = ToLowerCase[nameidUnd]; ennames = translatename[nameid, "en"]; runames = translatename[nameid, "ru"]; nameen0 = If[ennames === {}, "", capitalize[ennames[[1]]] ]; nameru0 = If[runames === {}, "", capitalize[runames[[1]]] ]; nameen = If[ennames === {}, nameid, nameen0 <> " (" <> nameid <> ")"]; nameru = If[runames === {}, nameid, nameru0 <> " (" <> nameid <> ")"]; nameen1 = If[ennames === {}, name, nameid <> " " <> longdash <> " " <> nameen0]; nameru1 = If[runames === {}, name, nameid <> " " <> longdash <> " " <> nameru0]; familyL = ToLowerCase[family]; nofamily = "not assigned"; rfamily = rusfamily[family]; familyen = familyru = ToUpperCase[family]; If[familyL === nofamily, familyen = familyru = "NOT ASSIGNED to a family"]; If[Head[rfamily] === String, familyru = familyru <> " - " <> rfamily]; familylinken = If[familyL === nofamily, family, " family <> "\">" <> family <> ""]; familylinkru = If[familyL === nofamily, family, " family <> "\">" <> family <> ""]; classlinefullen = classlinefix["kingdom > phylum > class > order > " <> familylinken <> " > species"]; classlinefullru = classlinefix["kingdom > phylum > class > order > " <> familylinkru <> " > species"]; (* Synonyms *) msynonyms = Length[synonyms]; synline = If[msynonyms == 0, "", If[msynonyms == 1, synonyms[[1]], StringJoin[Riffle[synonyms, ", "]] ]]; synlineen = If[msynonyms == 0, "", If[msynonyms == 1, "

Synonym: " <> synline <> ".

", "

Synonyms: " <> synline <> ".

" ]]; synlineru = If[msynonyms == 0, "", If[msynonyms == 1, "

\:0421\:0438\:043d\:043e\:043d\:0438\:043c: " <> synline <> ".

", "

\:0421\:0438\:043d\:043e\:043d\:0438\:043c\:044b: " <> synline <> ".

" ]]; (* Common names *) cnamesen = ennames; mcnamesen = Length[cnamesen]; comlineen = If[mcnamesen == 0, "", If[mcnamesen == 1, cnamesen[[1]], StringJoin[Riffle[cnamesen, ", "]] ]]; comlineen1 = If[mcnamesen < 2, "", StringJoin[Riffle[Drop[cnamesen, 1], ", "]] ]; cnamesru = runames; mcnamesru = Length[cnamesru]; comlineru = If[mcnamesru == 0, "", If[mcnamesru == 1, cnamesru[[1]], StringJoin[Riffle[cnamesru, ", "]] ]]; locnames = transliterate[comlineru]; comlineru1 = If[mcnamesru < 2, "", StringJoin[Riffle[Drop[cnamesru, 1], ", "]] ]; locline = If[mcnamesru == 0, "", If[mcnamesru == 1, "

Russian name: " <> locnames <> ".

", "

Russian names: " <> locnames <> ".

" ]]; commlineen = If[mcnamesen == 0, "", If[mcnamesen == 1, "

Common name: " <> comlineen <> ".

", "

Common names: " <> comlineen <> ".

" ]]; commlineru = If[mcnamesru == 0, "", If[mcnamesru == 1, "

\:0420\:0443\:0441\:0441\:043a\:043e\:0435 \:043d\:0430\:0437\:0432\ \:0430\:043d\:0438\:0435: " <> comlineru <> ".

", "

\:0420\:0443\:0441\:0441\:043a\:0438\:0435 \:043d\:0430\:0437\:0432\ \:0430\:043d\:0438\:044f: " <> comlineru <> ".

" ]]; (* Keywords *) allnamesen = Join[{nameid}, If[Length[ennames] < 8, ennames, Take[ennames, 7]], If[Length[synonyms] < 4, synonyms, Take[synonyms, 3]]]; enkeywords = StringJoin[Riffle[allnamesen, ", "]]; allnamesru = Join[{nameid}, If[Length[runames] < 8, runames, Take[runames, 7]], If[Length[synonyms] < 4, synonyms, Take[synonyms, 3]]]; rukeywords = StringJoin[Riffle[allnamesru, ", "]]; ]; dateS[date_List, s_String] := DateString[date, {"Year", s, "Month", s, "Day"}]; picCaption[r_String, p_String] := (picCaption[r, p] = stringtrim[ncaption[caption[r, p], "; "]]); picCaptionW[r_String, p_String] := Module[{cap, dash, x, y}, cap = picCaption[r, p]; dash = "DDDDDDDDDD"; cap = StringReplace[cap, x : LetterCharacter ~~ "-" ~~ y : LetterCharacter :> x <> dash <> y]; cap = StringReplace[cap, Except[LetterCharacter] -> " "]; cap = StringReplace[cap, dash -> "-"]; stringtrim[cap] ]; (* Saving list of common names to a file *) cnameSources = {}; dirCnames = FileNameJoin[{dirwork, "cnames"}]; cdataSave[cdata_List, source_String] := Module[{sl, ndata, dirsave, filesave, lines, name, data, sln}, sl = 30; ndata = Length[cdata]; If[ndata > 0, cnameSources = Append[cnameSources, source]; dirsave = dirCnames; checkdir1[dirsave]; filesave = FileNameJoin[{dirsave, source <> ".dat"}]; lines = Table[ {name, data} = cdat; data = stringtrim /@ StringSplit[data, ","]; data = StringRiffle[data, ", "]; name = name <> ": "; sln = StringLength[name]; If[sln < sl, name = StringJoin[name, Table[" ", {sl - sln}]]]; name <> data, {cdat, cdata}]; Export[filesave, lines, "Lines"]; Print["cdataSave: wrote ", ndata, " lines. Souce: ", source], Print["cdataSave: nothing to add. Souce: ", source]; ]; ]; (* Russian names of months *) filemonths = ToFileName[dirwork, "rusmonths.txt"]; checkfile[filemonths]; rusmonths = StringSplit[Import[filemonths, "Text"]]; If[Length[rusmonths] != 12, Print["Error: rusmonths: ", rusmonths, ". The length should be 12. Exiting ..."]; Exit[]; ]; openOnClick[file_String, text_String] := Button[ Style[text, Blue, FontFamily -> "Arial", 12], Run["start notepad \"" <> file <> "\""], Appearance -> "Frameless" ];