| Line | Stmts. | Exclusive Time | Avg. |
| 1 | | | | # AWSTATS SEARCH ENGINES DATABASE
|
| 2 | | | | #------------------------------------------------------------------------------
|
| 3 | | | | # If you want to add a Search Engine to extend AWStats database detection capabilities,
|
| 4 | | | | # you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in
|
| 5 | | | | # SearchEnginesHashLib.
|
| 6 | | | | # An entry if known in SearchEnginesKnownUrl is also welcome.
|
| 7 | | | | #------------------------------------------------------------------------------
|
| 8 | | | | # $Revision: 1.43 $ - $Author: eldy $ - $Date: 2007/06/06 17:14:21 $
|
| 9 | | | |
|
| 10 | | | | # 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
|
| 11 | | | | # added minor italian search engines
|
| 12 | | | | # arianna http://arianna.libero.it/
|
| 13 | | | | # supereva http://search.supereva.com/
|
| 14 | | | | # kataweb http://kataweb.it/
|
| 15 | | | | # corrected uk looksmart
|
| 16 | | | | # 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=',
|
| 17 | | | | # to
|
| 18 | | | | # 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
|
| 19 | | | | # corrected spelling
|
| 20 | | | | # internationnal -> international
|
| 21 | | | | # added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to
|
| 22 | | | | # avoid counting gmail referrals as search engine traffic
|
| 23 | | | | # 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html
|
| 24 | | | | # avoid counting babelfish.altavista referrals as search engine traffic
|
| 25 | | | | # avoid counting translate.google referrals as search engine traffic
|
| 26 | | | | # 2005-11-20 Sean Carlos
|
| 27 | | | | # added missing 'tiscali','key=', entry. Check order
|
| 28 | | | | # 2005-11-22 Sean Carlos
|
| 29 | | | | # added Google Base & Froogle. Froogle not tested.
|
| 30 | | | | # 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html
|
| 31 | | | | # added biglotron.com (France)
|
| 32 | | | | # added blingo http://www.blingo.com/
|
| 33 | | | | # added Clusty & Vivisimo
|
| 34 | | | | # added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783]
|
| 35 | | | | # added GPU p2p search http://search.centraldatabase.org/
|
| 36 | | | | # added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688]
|
| 37 | | | | # added Ask group's "mysearch"
|
| 38 | | | | # added sify.com (India)
|
| 39 | | | | # added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603]
|
| 40 | | | | # Ask changes:
|
| 41 | | | | # - added Ask Japan (ask.jp)
|
| 42 | | | | # - break out Ask new country level variants (DE, ES, FR, IT, NL)
|
| 43 | | | | # - updated Ask name from Ask Jevees
|
| 44 | | | | # - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444]
|
| 45 | | | | # - updated Ask uk (new uk.ask.com added to older ask.co.uk)
|
| 46 | | | | # updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912]
|
| 47 | | | | # for each new engine, added link to Search Engine. This serves to document engine. Done for major & Italian engines as well. Requires patch
|
| 48 | | | | # to AWStats to allow untranslated html. Otherwise html will appear instead of link.
|
| 49 | | | | # reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer
|
| 50 | | | | # exists https://sourceforge.net/forum/message.php?msg_id=3025426
|
| 51 | | | | # 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html
|
| 52 | | | | # added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden)
|
| 53 | | | | # added Alice Internal Search (blends data with Google?) search.alice.it.master:10005
|
| 54 | | | | # added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104
|
| 55 | | | | # To do: add more extensive IP list; keywords not yet detected.
|
| 56 | | | | # added icerocket.com blog search http://www.icerocket.com/
|
| 57 | | | | # added live.com (msn) http://www.live.com/
|
| 58 | | | | # added Meta motor kartoo. Note: Kartoo does not provide search words in referrers, thus the engine will appear in the
|
| 59 | | | | # search engine list but the actual search words are not available.
|
| 60 | | | | # added netluchs.de http://www.netluchs.de/
|
| 61 | | | | # added sphere.com blog search http://www.sphere.com/
|
| 62 | | | | # added wwweasel.de http://wwweasel.de
|
| 63 | | | | # added Yahoo Mindset! http://mindset.research.yahoo.com/
|
| 64 | | | | # updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland)
|
| 65 | | | | # 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html
|
| 66 | | | | # added Google cache IPs 64.233.183.104 & 66.102.7.104
|
| 67 | | | | # 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html
|
| 68 | | | | # anzwers.com.au
|
| 69 | | | | # schoenerbrausen.de http://www.schoenerbrausen.de/
|
| 70 | | | | # added Google cache IP 216.239.59.104
|
| 71 | | | | # answerbus http://www.answerbus.com/ (does not provide keywords)
|
| 72 | | | | # 2006-05-23 Sean Carlos http://www.antezeta.com/awstats.html
|
| 73 | | | | # added Google cache IP 66.102.9.104, 64.233.161.104
|
| 74 | | | | # 2006-06-23 Sean Carlos http://www.antezeta.com/awstats.html
|
| 75 | | | | # added Alice Search search.alice.it
|
| 76 | | | | # added GoodSearch http://www.goodsearch.com/ (does not provide keywords) "a Yahoo-powered search engine that donates money to your favorite charity or school each time you search the web"
|
| 77 | | | | # added googlee.com, variant of Google
|
| 78 | | | | # added gotuneed http://www.gotuneed.com/ Italian search engine, in beta
|
| 79 | | | | # added icq.com
|
| 80 | | | | # added logic to parse Google Cache search keywords. Seems to work for alpha but not numeric cache IDs, i.e. search?q=cache:lWVLmnuGJswJ: is recognized but q=cache:Yv5qxeJNuhgJ: is not recognized. The URL triggering the keywords will also appear. The URLs are probably too varied to parse out?
|
| 81 | | | | # added Nusearch http://www.nusearch.com/
|
| 82 | | | | # added Polymeta www.polymeta.hu (does not provide keywords)
|
| 83 | | | | # added scroogle http://www.scroogle.org/ (does not always provide keywords)
|
| 84 | | | | # added Tango http://tango.hu/search.php?st=0&q=jeles+napok
|
| 85 | | | | # Changed Google Cache notation 64\.233\.(161|167|179|183|187)\.104 to 64\.233\.1[0-9]{2}\.104
|
| 86 | | | | # 72\.14\.(203|205|207|209|221)\.104 to 72\.14\.2[0-9]{2}\.104
|
| 87 | | | | # 216\.239\.(51|59)\.104 to 216\.239\.5[0-9]\.104
|
| 88 | | | | # 66\.102\.(7|9)\.104 to 66\.102\.[1-9]\.104
|
| 89 | | | | # 2006-06-27 Sean Carlos http://www.antezeta.com/awstats.html
|
| 90 | | | | # added Onet.pl http://szukaj.onet.pl/
|
| 91 | | | | # corrected name "Wirtualna Polska" from "Szukaj" (search); added link http://szukaj.wp.pl/
|
| 92 | | | | # 2006-06-30 Sean Carlos http://www.antezeta.com/awstats.html
|
| 93 | | | | # Additional Polish Search Engines:
|
| 94 | | | | # added Dodaj.pl http://www.dodaj.pl/
|
| 95 | | | | # added Gazeta.pl http://szukaj.gazeta.pl/
|
| 96 | | | | # added Gery.pl http://szukaj.gery.pl/
|
| 97 | | | | # added Hoga.pl http://www.hoga.pl/
|
| 98 | | | | # added Interia.pl http://www.google.interia.pl/
|
| 99 | | | | # added Katalog.Onet.pl http://katalog.onet.pl/
|
| 100 | | | | # added NetSprint.pl http://www.netsprint.pl/
|
| 101 | | | | # added o2.pl http://szukaj2.o2.pl/
|
| 102 | | | | # added Polska http://szukaj.polska.pl/
|
| 103 | | | | # added Szukacz http://www.szukacz.pl/
|
| 104 | | | | # added Wow.pl http://szukaj.wow.pl/
|
| 105 | | | | # added Sagool http://sagool.jp/
|
| 106 | | | |
|
| 107 | | | | # 2006-08-25 Social Bookmarks
|
| 108 | | | | # International
|
| 109 | | | | # added del.icio.us/search - for now, just search referrer. To do: consider /tag/(tagname) referrer?
|
| 110 | | | | # added stumbleupon.com - No keywords supplied.
|
| 111 | | | | # added swik.net
|
| 112 | | | | # added digg. Keywords sometimes supplied.
|
| 113 | | | | # Italy
|
| 114 | | | | # added segnalo.alice.it - No keywords supplied.
|
| 115 | | | | # added ineffabile.it - No keywords supplied.
|
| 116 | | | |
|
| 117 | | | | # added filter for google groups. Attempt to parse group name as keyword.
|
| 118 | | | |
|
| 119 | | | | # 2006-09-14
|
| 120 | | | | # added Eniro Sverige http://www.eniro.se/
|
| 121 | | | | # added MyWebSearch http://search.mywebsearch.com/
|
| 122 | | | | # added Teecno http://www.teecno.it/ Italian Open Source Search Engine
|
| 123 | | | |
|
| 124 | | | | #package AWSSE;
|
| 125 | | | |
|
| 126 | | | | # 2006-09-25 (Gabor Moizes)
|
| 127 | | | | # added 4-counter (Google alternative) http://4-counter.com/
|
| 128 | | | | # added Googlecom (Google alternative) http://googlecom.com/
|
| 129 | | | | # added Goggle (Google alternative) http://goggle.co.hu/
|
| 130 | | | | # added Comet toolbar http://as.starware.com
|
| 131 | | | | # added new IP for Yahoo: 216.109.125.130
|
| 132 | | | | # added Ledix http://ledix.net/
|
| 133 | | | | # added AT&T search (powered by Google) http://www.att.net/
|
| 134 | | | | # added Keresolap (Hungarian search engine) http://www.keresolap.hu/
|
| 135 | | | | # added Mozbot (French search engine) http://www.mozbot.fr/
|
| 136 | | | | # added Zoznam (Slovak search engine) http://www.zoznam.sk/
|
| 137 | | | | # added sapo.pt (Portuguese search engine) http://www.sapo.pt/
|
| 138 | | | | # added shaw.ca (powered by Google) http://start.shaw.ca/
|
| 139 | | | | # added Searchalot http://www.searchalot.com/
|
| 140 | | | | # added Copernic http://www.copernic.com/
|
| 141 | | | | # added 216.109.125.130 to Yahoo
|
| 142 | | | | # added 66.218.69.11 to Yahoo
|
| 143 | | | | # added Avantfind http://www.avantfind.com/
|
| 144 | | | | # added Steadysearch http://www.steadysearch.com/
|
| 145 | | | | # added Steadysearch http://www.steady-search.com/
|
| 146 | | | | # modified 216\.239\.5[0-9]\.104/search to 216\.239\.5[0-9]\.104
|
| 147 | | | |
|
| 148 | | | |
|
| 149 | | | | # SearchEnginesSearchIDOrder
|
| 150 | | | | # It contains all matching criteria to search for in log fields. This list is
|
| 151 | | | | # used to know in which order to search Search Engines IDs.
|
| 152 | | | | # Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more
|
| 153 | | | | # Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more
|
| 154 | | | | # Note: Regex IDs are in lower case and ' ' and '+' are changed into '_'
|
| 155 | | | | #------------------------------------------------------------------------------
|
| 156 | 1 | 1.8e-5 | 1.8e-5 | @SearchEnginesSearchIDOrder_list1=(
|
| 157 | | | | # Major international search engines
|
| 158 | | | | 'base\.google\.',
|
| 159 | | | | 'froogle\.google\.',
|
| 160 | | | | 'groups\.google\.',
|
| 161 | | | | 'images\.google\.',
|
| 162 | | | | 'google\.',
|
| 163 | | | | 'googlee\.',
|
| 164 | | | | 'googlecom\.com',
|
| 165 | | | | 'goggle\.co\.hu',
|
| 166 | | | | '216\.239\.(35|37|39|51)\.100',
|
| 167 | | | | '216\.239\.(35|37|39|51)\.101',
|
| 168 | | | | '216\.239\.5[0-9]\.104',
|
| 169 | | | | '64\.233\.1[0-9]{2}\.104',
|
| 170 | | | | '66\.102\.[1-9]\.104',
|
| 171 | | | | '66\.249\.93\.104',
|
| 172 | | | | '72\.14\.2[0-9]{2}\.104',
|
| 173 | | | | 'msn\.',
|
| 174 | | | | 'live\.com',
|
| 175 | | | | 'voila\.',
|
| 176 | | | | 'mindset\.research\.yahoo',
|
| 177 | | | | 'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)',
|
| 178 | | | | 'search\.aol\.co',
|
| 179 | | | | 'tiscali\.',
|
| 180 | | | | 'lycos\.',
|
| 181 | | | | 'alexa\.com',
|
| 182 | | | | 'alltheweb\.com',
|
| 183 | | | | 'altavista\.',
|
| 184 | | | | 'a9\.com',
|
| 185 | | | | 'dmoz\.org',
|
| 186 | | | | 'netscape\.',
|
| 187 | | | | 'search\.terra\.',
|
| 188 | | | | 'www\.search\.com',
|
| 189 | | | | 'search\.sli\.sympatico\.ca',
|
| 190 | | | | 'excite\.'
|
| 191 | | | | );
|
| 192 | | | |
|
| 193 | 1 | 6.8e-5 | 6.8e-5 | @SearchEnginesSearchIDOrder_list2=(
|
| 194 | | | | # Minor international search engines
|
| 195 | | | | '4\-counter\.com',
|
| 196 | | | | 'att\.net',
|
| 197 | | | | 'bungeebonesdotcom',
|
| 198 | | | | 'northernlight\.',
|
| 199 | | | | 'hotbot\.',
|
| 200 | | | | 'kvasir\.',
|
| 201 | | | | 'webcrawler\.',
|
| 202 | | | | 'metacrawler\.',
|
| 203 | | | | 'go2net\.com',
|
| 204 | | | | '(^|\.)go\.com',
|
| 205 | | | | 'euroseek\.',
|
| 206 | | | | 'looksmart\.',
|
| 207 | | | | 'spray\.',
|
| 208 | | | | 'nbci\.com\/search',
|
| 209 | | | | 'de\.ask.\com', # break out Ask country specific engines. (.jp is in Japan section)
|
| 210 | | | | 'es\.ask.\com',
|
| 211 | | | | 'fr\.ask.\com',
|
| 212 | | | | 'it\.ask.\com',
|
| 213 | | | | 'nl\.ask.\com',
|
| 214 | | | | 'uk\.ask.\com',
|
| 215 | | | | '(^|\.)ask\.com',
|
| 216 | | | | 'atomz\.',
|
| 217 | | | | 'overture\.com', # Replace 'goto\.com','Goto.com',
|
| 218 | | | | 'teoma\.',
|
| 219 | | | | 'findarticles\.com',
|
| 220 | | | | 'infospace\.com',
|
| 221 | | | | 'mamma\.',
|
| 222 | | | | 'dejanews\.',
|
| 223 | | | | 'dogpile\.com',
|
| 224 | | | | 'wisenut\.com',
|
| 225 | | | | 'ixquick\.com',
|
| 226 | | | | 'search\.earthlink\.net',
|
| 227 | | | | 'i-une\.com',
|
| 228 | | | | 'blingo\.com',
|
| 229 | | | | 'centraldatabase\.org',
|
| 230 | | | | 'clusty\.com',
|
| 231 | | | | 'mysearch\.',
|
| 232 | | | | 'vivisimo\.com',
|
| 233 | | | | 'kartoo\.com',
|
| 234 | | | | 'icerocket\.com',
|
| 235 | | | | 'sphere\.com',
|
| 236 | | | | 'ledix\.net',
|
| 237 | | | | 'start\.shaw\.ca',
|
| 238 | | | | 'searchalot\.com',
|
| 239 | | | | 'copernic\.com',
|
| 240 | | | | 'avantfind\.com',
|
| 241 | | | | 'steadysearch\.com',
|
| 242 | | | | 'steady-search\.com',
|
| 243 | | | | # Chello Portals
|
| 244 | | | | 'chello\.at',
|
| 245 | | | | 'chello\.be',
|
| 246 | | | | 'chello\.cz',
|
| 247 | | | | 'chello\.fr',
|
| 248 | | | | 'chello\.hu',
|
| 249 | | | | 'chello\.nl',
|
| 250 | | | | 'chello\.no',
|
| 251 | | | | 'chello\.pl',
|
| 252 | | | | 'chello\.se',
|
| 253 | | | | 'chello\.sk',
|
| 254 | | | | 'chello', # required as catchall for new countries not yet known
|
| 255 | | | | # Mirago
|
| 256 | | | | 'mirago\.be',
|
| 257 | | | | 'mirago\.ch',
|
| 258 | | | | 'mirago\.de',
|
| 259 | | | | 'mirago\.dk',
|
| 260 | | | | 'es\.mirago\.com',
|
| 261 | | | | 'mirago\.fr',
|
| 262 | | | | 'mirago\.it',
|
| 263 | | | | 'mirago\.nl',
|
| 264 | | | | 'no\.mirago\.com',
|
| 265 | | | | 'mirago\.se',
|
| 266 | | | | 'mirago\.co\.uk',
|
| 267 | | | | 'mirago', # required as catchall for new countries not yet known
|
| 268 | | | | 'answerbus\.com',
|
| 269 | | | | 'icq\.com\/search',
|
| 270 | | | | 'nusearch\.com',
|
| 271 | | | | 'goodsearch\.com',
|
| 272 | | | | 'scroogle\.org',
|
| 273 | | | | 'questionanswering\.com',
|
| 274 | | | | 'mywebsearch\.com',
|
| 275 | | | | 'as\.starware\.com',
|
| 276 | | | | # Social Bookmarking Services
|
| 277 | | | | 'del\.icio\.us',
|
| 278 | | | | 'digg\.com',
|
| 279 | | | | 'stumbleupon\.com',
|
| 280 | | | | 'swik\.net',
|
| 281 | | | | 'segnalo\.alice\.it',
|
| 282 | | | | 'ineffabile\.it',
|
| 283 | | | | # Minor Australian search engines
|
| 284 | | | | 'anzwers\.com\.au',
|
| 285 | | | | # Minor brazilian search engines
|
| 286 | | | | 'engine\.exe', 'miner\.bol\.com\.br',
|
| 287 | | | | # Minor chinese search engines
|
| 288 | | | | 'baidu\.com','search\.sina\.com','search\.sohu\.com', 'sogou\.com',
|
| 289 | | | | # Minor czech search engines
|
| 290 | | | | 'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz',
|
| 291 | | | | # Minor danish search-engines
|
| 292 | | | | 'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk',
|
| 293 | | | | # Minor dutch search engines
|
| 294 | | | | 'ilse\.','vindex\.',
|
| 295 | | | | # Minor english search engines
|
| 296 | | | | '(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk',
|
| 297 | | | | # Minor finnish search engines
|
| 298 | | | | 'haku\.www\.fi',
|
| 299 | | | | # Minor french search engines
|
| 300 | | | | 'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr',
|
| 301 | | | | 'toile\.com', 'biglotron\.com',
|
| 302 | | | | 'mozbot\.fr',
|
| 303 | | | | # Minor german search engines
|
| 304 | | | | 'sucheaol\.aol\.de',
|
| 305 | | | | 'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de',
|
| 306 | | | | 'suchen\.abacho\.de','brisbane\.t-online\.de','allesklar\.de','meinestadt\.de',
|
| 307 | | | | '212\.227\.33\.241',
|
| 308 | | | | '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)',
|
| 309 | | | | 'wwweasel\.de',
|
| 310 | | | | 'netluchs\.de',
|
| 311 | | | | 'schoenerbrausen\.de',
|
| 312 | | | | # Minor Hungarian search engines
|
| 313 | | | | 'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
|
| 314 | | | | 'tango\.hu',
|
| 315 | | | | 'keresolap\.hu',
|
| 316 | | | | 'polymeta\.hu',
|
| 317 | | | | # Minor Indian search engines
|
| 318 | | | | 'sify\.com',
|
| 319 | | | | # Minor Italian search engines
|
| 320 | | | | 'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master','search\.alice\.it','gotuneed\.com',
|
| 321 | | | | 'godado','jumpy\.it','shinyseek\.it','teecno\.it',
|
| 322 | | | | # Minor Japanese search engines
|
| 323 | | | | 'ask\.jp','sagool\.jp',
|
| 324 | | | | # Minor Norwegian search engines
|
| 325 | | | | 'sok\.start\.no', 'eniro\.no',
|
| 326 | | | | # Minor Polish search engines
|
| 327 | | | | 'szukaj\.wp\.pl','szukaj\.onet\.pl','dodaj\.pl','gazeta\.pl','gery\.pl','hoga\.pl','netsprint\.pl','interia\.pl','katalog\.onet\.pl','o2\.pl','polska\.pl','szukacz\.pl','wow\.pl',
|
| 328 | | | | # Minor russian search engines
|
| 329 | | | | 'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru',
|
| 330 | | | | # Minor Swedish search engines
|
| 331 | | | | 'evreka\.passagen\.se','eniro\.se',
|
| 332 | | | | # Minor Slovak search engines
|
| 333 | | | | 'zoznam\.sk',
|
| 334 | | | | # Minor Portuguese search engines
|
| 335 | | | | 'sapo\.pt',
|
| 336 | | | | # Minor swiss search engines
|
| 337 | | | | 'search\.ch', 'search\.bluewin\.ch'
|
| 338 | | | | );
|
| 339 | 1 | 1.0e-6 | 1.0e-6 | @SearchEnginesSearchIDOrder_listgen=(
|
| 340 | | | | # Generic search engines
|
| 341 | | | | 'search\..*\.\w+'
|
| 342 | | | | );
|
| 343 | | | |
|
| 344 | | | |
|
| 345 | | | | # NotSearchEnginesKeys
|
| 346 | | | | # If a search engine key is found, we check its exclude list to know if it's
|
| 347 | | | | # really a search engine
|
| 348 | | | | #------------------------------------------------------------------------------
|
| 349 | 1 | 1.2e-5 | 1.2e-5 | %NotSearchEnginesKeys=(
|
| 350 | | | | 'altavista\.'=>'babelfish\.altavista\.',
|
| 351 | | | | 'google\.'=>'mail\.google\.',
|
| 352 | | | | 'google\.'=>'translate\.google\.',
|
| 353 | | | | 'msn\.'=>'hotmail\.msn\.',
|
| 354 | | | | 'tiscali\.'=>'mail\.tiscali\.',
|
| 355 | | | | 'yahoo\.'=>'mail\.yahoo\.',
|
| 356 | | | | 'yandex\.'=>'direct\.yandex\.'
|
| 357 | | | | );
|
| 358 | | | |
|
| 359 | | | |
|
| 360 | | | | # SearchEnginesHashID
|
| 361 | | | | # Each Search Engine Search ID is associated to an AWStats id string
|
| 362 | | | | #------------------------------------------------------------------------------
|
| 363 | 1 | 0.00024 | 0.00024 | %SearchEnginesHashID = (
|
| 364 | | | | # Major international search engines
|
| 365 | | | | 'base\.google\.','google_base',
|
| 366 | | | | 'froogle\.google\.','google_froogle',
|
| 367 | | | | 'groups\.google\.','google_groups',
|
| 368 | | | | 'images\.google\.','google_image',
|
| 369 | | | | 'google\.','google',
|
| 370 | | | | 'googlee\.','google',
|
| 371 | | | | 'googlecom\.com','google',
|
| 372 | | | | 'goggle\.co\.hu','google',
|
| 373 | | | | '216\.239\.(35|37|39|51)\.100','google_cache',
|
| 374 | | | | '216\.239\.(35|37|39|51)\.101','google_cache',
|
| 375 | | | | '216\.239\.5[0-9]\.104','google_cache',
|
| 376 | | | | '64\.233\.1[0-9]{2}\.104','google_cache',
|
| 377 | | | | '66\.102\.[1-9]\.104','google_cache',
|
| 378 | | | | '66\.249\.93\.104','google_cache',
|
| 379 | | | | '72\.14\.2[0-9]{2}\.104','google_cache',
|
| 380 | | | | 'msn\.','msn',
|
| 381 | | | | 'live\.com','live',
|
| 382 | | | | 'voila\.','voila',
|
| 383 | | | | 'mindset\.research\.yahoo','yahoo_mindset',
|
| 384 | | | | 'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)','yahoo',
|
| 385 | | | | 'lycos\.','lycos',
|
| 386 | | | | 'alexa\.com','alexa',
|
| 387 | | | | 'alltheweb\.com','alltheweb',
|
| 388 | | | | 'altavista\.','altavista',
|
| 389 | | | | 'a9\.com','a9',
|
| 390 | | | | 'dmoz\.org','dmoz',
|
| 391 | | | | 'netscape\.','netscape',
|
| 392 | | | | 'search\.terra\.','terra',
|
| 393 | | | | 'www\.search\.com','search.com',
|
| 394 | | | | 'tiscali\.','tiscali',
|
| 395 | | | | 'search\.aol\.co','aol',
|
| 396 | | | | 'search\.sli\.sympatico\.ca','sympatico',
|
| 397 | | | | 'excite\.','excite',
|
| 398 | | | | # Minor international search engines
|
| 399 | | | | '4\-counter\.com','google4counter',
|
| 400 | | | | 'att\.net','att',
|
| 401 | | | | 'bungeebonesdotcom','bungeebonesdotcom',
|
| 402 | | | | 'northernlight\.','northernlight',
|
| 403 | | | | 'hotbot\.','hotbot',
|
| 404 | | | | 'kvasir\.','kvasir',
|
| 405 | | | | 'webcrawler\.','webcrawler',
|
| 406 | | | | 'metacrawler\.','metacrawler',
|
| 407 | | | | 'go2net\.com','go2net',
|
| 408 | | | | '(^|\.)go\.com','go',
|
| 409 | | | | 'euroseek\.','euroseek',
|
| 410 | | | | 'looksmart\.','looksmart',
|
| 411 | | | | 'spray\.','spray',
|
| 412 | | | | 'nbci\.com\/search','nbci',
|
| 413 | | | | 'de\.ask.\com','askde', # break out Ask country specific engines.
|
| 414 | | | | 'es\.ask.\com','askes',
|
| 415 | | | | 'fr\.ask.\com','askfr',
|
| 416 | | | | 'it\.ask.\com','askit',
|
| 417 | | | | 'nl\.ask.\com','asknl',
|
| 418 | | | | 'uk\.ask.\com','askuk',
|
| 419 | | | | '(^|\.)ask\.co\.uk','askuk',
|
| 420 | | | | '(^|\.)ask\.com','ask',
|
| 421 | | | | 'atomz\.','atomz',
|
| 422 | | | | 'overture\.com','overture', # Replace 'goto\.com','Goto.com',
|
| 423 | | | | 'teoma\.','teoma',
|
| 424 | | | | 'findarticles\.com','findarticles',
|
| 425 | | | | 'infospace\.com','infospace',
|
| 426 | | | | 'mamma\.','mamma',
|
| 427 | | | | 'dejanews\.','dejanews',
|
| 428 | | | | 'dogpile\.com','dogpile',
|
| 429 | | | | 'wisenut\.com','wisenut',
|
| 430 | | | | 'ixquick\.com','ixquick',
|
| 431 | | | | 'search\.earthlink\.net','earthlink',
|
| 432 | | | | 'i-une\.com','iune',
|
| 433 | | | | 'blingo\.com','blingo',
|
| 434 | | | | 'centraldatabase\.org','centraldatabase',
|
| 435 | | | | 'clusty\.com','clusty',
|
| 436 | | | | 'mysearch\.','mysearch',
|
| 437 | | | | 'vivisimo\.com','vivisimo',
|
| 438 | | | | 'kartoo\.com','kartoo',
|
| 439 | | | | 'icerocket\.com','icerocket',
|
| 440 | | | | 'sphere\.com','sphere',
|
| 441 | | | | 'ledix\.net','ledix',
|
| 442 | | | | 'start\.shaw\.ca','shawca',
|
| 443 | | | | 'searchalot\.com','searchalot',
|
| 444 | | | | 'copernic\.com','copernic',
|
| 445 | | | | 'avantfind\.com','avantfind',
|
| 446 | | | | 'steadysearch\.com','steadysearch',
|
| 447 | | | | 'steady-search\.com','steadysearch',
|
| 448 | | | | # Chello Portals
|
| 449 | | | | 'chello\.at','chelloat',
|
| 450 | | | | 'chello\.be','chellobe',
|
| 451 | | | | 'chello\.cz','chellocz',
|
| 452 | | | | 'chello\.fr','chellofr',
|
| 453 | | | | 'chello\.hu','chellohu',
|
| 454 | | | | 'chello\.nl','chellonl',
|
| 455 | | | | 'chello\.no','chellono',
|
| 456 | | | | 'chello\.pl','chellopl',
|
| 457 | | | | 'chello\.se','chellose',
|
| 458 | | | | 'chello\.sk','chellosk',
|
| 459 | | | | 'chello','chellocom',
|
| 460 | | | | # Mirago
|
| 461 | | | | 'mirago\.be','miragobe',
|
| 462 | | | | 'mirago\.ch','miragoch',
|
| 463 | | | | 'mirago\.de','miragode',
|
| 464 | | | | 'mirago\.dk','miragodk',
|
| 465 | | | | 'es\.mirago\.com','miragoes',
|
| 466 | | | | 'mirago\.fr','miragofr',
|
| 467 | | | | 'mirago\.it','miragoit',
|
| 468 | | | | 'mirago\.nl','miragonl',
|
| 469 | | | | 'no\.mirago\.com','miragono',
|
| 470 | | | | 'mirago\.se','miragose',
|
| 471 | | | | 'mirago\.co\.uk','miragocouk',
|
| 472 | | | | 'mirago','mirago', # required as catchall for new countries not yet known
|
| 473 | | | | 'answerbus\.com','answerbus',
|
| 474 | | | | 'icq\.com\/search','icq',
|
| 475 | | | | 'nusearch\.com','nusearch',
|
| 476 | | | | 'goodsearch\.com','goodsearch',
|
| 477 | | | | 'scroogle\.org','scroogle',
|
| 478 | | | | 'questionanswering\.com','questionanswering',
|
| 479 | | | | 'mywebsearch\.com','mywebsearch',
|
| 480 | | | | 'as\.starware\.com','comettoolbar',
|
| 481 | | | | # Social Bookmarking Services
|
| 482 | | | | 'del\.icio\.us','delicious',
|
| 483 | | | | 'digg\.com','digg',
|
| 484 | | | | 'stumbleupon\.com','stumbleupon',
|
| 485 | | | | 'swik\.net','swik',
|
| 486 | | | | 'segnalo\.alice\.it','segnalo',
|
| 487 | | | | 'ineffabile\.it','ineffabile',
|
| 488 | | | | # Minor Australian search engines
|
| 489 | | | | 'anzwers\.com\.au','anzwers',
|
| 490 | | | | # Minor brazilian search engines
|
| 491 | | | | 'engine\.exe','engine',
|
| 492 | | | | 'miner\.bol\.com\.br','miner',
|
| 493 | | | | # Minor chinese search engines
|
| 494 | | | | 'baidu\.com','baidu',
|
| 495 | | | | 'search\.sina\.com','sina',
|
| 496 | | | | 'search\.sohu\.com','sohu',
|
| 497 | | | | 'sogou\.com','sogou',
|
| 498 | | | | # Minor czech search engines
|
| 499 | | | | 'atlas\.cz','atlas',
|
| 500 | | | | 'seznam\.cz','seznam',
|
| 501 | | | | 'quick\.cz','quick',
|
| 502 | | | | 'centrum\.cz','centrum',
|
| 503 | | | | 'jyxo\.(cz|com)','jyxo',
|
| 504 | | | | 'najdi\.to','najdi',
|
| 505 | | | | 'redbox\.cz','redbox',
|
| 506 | | | | # Minor danish search-engines
|
| 507 | | | | 'opasia\.dk','opasia',
|
| 508 | | | | 'danielsen\.com','danielsen',
|
| 509 | | | | 'sol\.dk','sol',
|
| 510 | | | | 'jubii\.dk','jubii',
|
| 511 | | | | 'find\.dk','finddk',
|
| 512 | | | | 'edderkoppen\.dk','edderkoppen',
|
| 513 | | | | 'netstjernen\.dk','netstjernen',
|
| 514 | | | | 'orbis\.dk','orbis',
|
| 515 | | | | 'tyfon\.dk','tyfon',
|
| 516 | | | | '1klik\.dk','1klik',
|
| 517 | | | | 'ofir\.dk','ofir',
|
| 518 | | | | # Minor dutch search engines
|
| 519 | | | | 'ilse\.','ilse',
|
| 520 | | | | 'vindex\.','vindex',
|
| 521 | | | | # Minor english search engines
|
| 522 | | | | 'bbc\.co\.uk/cgi-bin/search','bbc',
|
| 523 | | | | 'ifind\.freeserve','freeserve',
|
| 524 | | | | 'looksmart\.co\.uk','looksmartuk',
|
| 525 | | | | 'splut\.','splut',
|
| 526 | | | | 'spotjockey\.','spotjockey',
|
| 527 | | | | 'ukdirectory\.','ukdirectory',
|
| 528 | | | | 'ukindex\.co\.uk','ukindex',
|
| 529 | | | | 'ukplus\.','ukplus',
|
| 530 | | | | 'searchy\.co\.uk','searchy',
|
| 531 | | | | # Minor finnish search engines
|
| 532 | | | | 'haku\.www\.fi','haku',
|
| 533 | | | | # Minor french search engines
|
| 534 | | | | 'recherche\.aol\.fr','aolfr',
|
| 535 | | | | 'ctrouve\.','ctrouve',
|
| 536 | | | | 'francite\.','francite',
|
| 537 | | | | '\.lbb\.org','lbb',
|
| 538 | | | | 'rechercher\.libertysurf\.fr','libertysurf',
|
| 539 | | | | 'search[\w\-]+\.free\.fr','free',
|
| 540 | | | | 'recherche\.club-internet\.fr','clubinternet',
|
| 541 | | | | 'toile\.com','toile',
|
| 542 | | | | 'biglotron\.com', 'biglotron',
|
| 543 | | | | 'mozbot\.fr', 'mozbot',
|
| 544 | | | | # Minor german search engines
|
| 545 | | | | 'sucheaol\.aol\.de','aolde',
|
| 546 | | | | 'fireball\.de','fireball',
|
| 547 | | | | 'infoseek\.de','infoseek',
|
| 548 | | | | 'suche\d?\.web\.de','webde',
|
| 549 | | | | '[a-z]serv\.rrzn\.uni-hannover\.de','meta',
|
| 550 | | | | 'suchen\.abacho\.de','abacho',
|
| 551 | | | | 'brisbane\.t-online\.de','t-online',
|
| 552 | | | | 'allesklar\.de','allesklar',
|
| 553 | | | | 'meinestadt\.de','meinestadt',
|
| 554 | | | | '212\.227\.33\.241','metaspinner',
|
| 555 | | | | '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de',
|
| 556 | | | | 'wwweasel\.de','wwweasel',
|
| 557 | | | | 'netluchs\.de','netluchs',
|
| 558 | | | | 'schoenerbrausen\.de','schoenerbrausen',
|
| 559 | | | | # Minor Hungarian search engines
|
| 560 | | | | 'heureka\.hu','heureka',
|
| 561 | | | | 'vizsla\.origo\.hu','origo',
|
| 562 | | | | 'lapkereso\.hu','lapkereso',
|
| 563 | | | | 'goliat\.hu','goliat',
|
| 564 | | | | 'index\.hu','indexhu',
|
| 565 | | | | 'wahoo\.hu','wahoo',
|
| 566 | | | | 'webmania\.hu','webmania',
|
| 567 | | | | 'search\.internetto\.hu','internetto',
|
| 568 | | | | 'tango\.hu','tango_hu',
|
| 569 | | | | 'keresolap\.hu','keresolap_hu',
|
| 570 | | | | 'polymeta\.hu','polymeta_hu',
|
| 571 | | | | # Minor Indian search engines
|
| 572 | | | | 'sify\.com','sify',
|
| 573 | | | | # Minor Italian search engines
|
| 574 | | | | 'virgilio\.it','virgilio',
|
| 575 | | | | 'arianna\.libero\.it','arianna',
|
| 576 | | | | 'supereva\.com','supereva',
|
| 577 | | | | 'kataweb\.it','kataweb',
|
| 578 | | | | 'search\.alice\.it\.master','aliceitmaster',
|
| 579 | | | | 'search\.alice\.it','aliceit',
|
| 580 | | | | 'gotuneed\.com','gotuneed',
|
| 581 | | | | 'godado','godado',
|
| 582 | | | | 'jumpy\.it','jumpy\.it',
|
| 583 | | | | 'shinyseek\.it','shinyseek\.it',
|
| 584 | | | | 'teecno\.it','teecnoit',
|
| 585 | | | | # Minor Japanese search engines
|
| 586 | | | | 'ask\.jp','askjp',
|
| 587 | | | | 'sagool\.jp','sagool',
|
| 588 | | | | # Minor Norwegian search engines
|
| 589 | | | | 'sok\.start\.no','start', 'eniro\.no','eniro',
|
| 590 | | | | # Minor Polish search engines
|
| 591 | | | | 'szukaj\.wp\.pl','wp',
|
| 592 | | | | 'szukaj\.onet\.pl','onetpl',
|
| 593 | | | | 'dodaj\.pl','dodajpl',
|
| 594 | | | | 'gazeta\.pl','gazetapl',
|
| 595 | | | | 'gery\.pl','gerypl',
|
| 596 | | | | 'netsprint\.pl\/hoga\-search','hogapl',
|
| 597 | | | | 'netsprint\.pl','netsprintpl',
|
| 598 | | | | 'interia\.pl','interiapl',
|
| 599 | | | | 'katalog\.onet\.pl','katalogonetpl',
|
| 600 | | | | 'o2\.pl','o2pl',
|
| 601 | | | | 'polska\.pl','polskapl',
|
| 602 | | | | 'szukacz\.pl','szukaczpl',
|
| 603 | | | | 'wow\.pl','wowpl',
|
| 604 | | | | # Minor russian search engines
|
| 605 | | | | 'ya(ndex)?\.ru','yandex',
|
| 606 | | | | 'aport\.ru','aport',
|
| 607 | | | | 'rambler\.ru','rambler',
|
| 608 | | | | 'turtle\.ru','turtle',
|
| 609 | | | | 'metabot\.ru','metabot',
|
| 610 | | | | # Minor Swedish search engines
|
| 611 | | | | 'evreka\.passagen\.se','passagen',
|
| 612 | | | | 'eniro\.se','enirose',
|
| 613 | | | | # Minor Slovak search engines
|
| 614 | | | | 'zoznam\.sk','zoznam',
|
| 615 | | | | # Minor Portuguese search engines
|
| 616 | | | | 'sapo\.pt','sapo',
|
| 617 | | | | # Minor swiss search engines
|
| 618 | | | | 'search\.ch','searchch',
|
| 619 | | | | 'search\.bluewin\.ch','bluewin',
|
| 620 | | | | # Generic search engines
|
| 621 | | | | 'search\..*\.\w+','search'
|
| 622 | | | | );
|
| 623 | | | |
|
| 624 | | | |
|
| 625 | | | | # SearchEnginesWithKeysNotInQuery
|
| 626 | | | | # List of search engines that store keyword as page instead of query parameter
|
| 627 | | | | #------------------------------------------------------------------------------
|
| 628 | 1 | 3.0e-6 | 3.0e-6 | %SearchEnginesWithKeysNotInQuery=(
|
| 629 | | | | 'a9',1 # www.a9.com/searckey1%20searchkey2
|
| 630 | | | | );
|
| 631 | | | |
|
| 632 | | | | # SearchEnginesKnownUrl
|
| 633 | | | | # Known rules to extract keywords from a referrer search engine URL
|
| 634 | | | | #------------------------------------------------------------------------------
|
| 635 | 1 | 0.00020 | 0.00020 | %SearchEnginesKnownUrl=(
|
| 636 | | | | # Most common search engines
|
| 637 | | | | 'alexa','q=',
|
| 638 | | | | 'alltheweb','q(|uery)=',
|
| 639 | | | | 'altavista','q=',
|
| 640 | | | | 'a9','a9\.com\/',
|
| 641 | | | | 'dmoz','search=',
|
| 642 | | | | 'google_base','(p|q|as_p|as_q)=',
|
| 643 | | | | 'google_froogle','(p|q|as_p|as_q)=',
|
| 644 | | | | 'google_groups','group\/', # does not work
|
| 645 | | | | 'google_image','(p|q|as_p|as_q)=',
|
| 646 | | | | 'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:',
|
| 647 | | | | 'google','(p|q|as_p|as_q)=',
|
| 648 | | | | 'lycos','query=',
|
| 649 | | | | 'msn','q=',
|
| 650 | | | | 'live','q=',
|
| 651 | | | | 'netscape','search=',
|
| 652 | | | | 'tiscali','key=',
|
| 653 | | | | 'aol','query=',
|
| 654 | | | | 'terra','query=',
|
| 655 | | | | 'voila','(kw|rdata)=',
|
| 656 | | | | 'search.com','q=',
|
| 657 | | | | 'yahoo_mindset','p=',
|
| 658 | | | | 'yahoo','p=',
|
| 659 | | | | 'sympatico', 'query=',
|
| 660 | | | | 'excite','search=',
|
| 661 | | | | # Minor international search engines
|
| 662 | | | | 'google4counter','(p|q|as_p|as_q)=',
|
| 663 | | | | 'att','qry=',
|
| 664 | | | | 'bungeebonesdotcom','query=',
|
| 665 | | | | 'go','qt=',
|
| 666 | | | | 'askde','(ask|q)=', # break out Ask country specific engines.
|
| 667 | | | | 'askes','(ask|q)=',
|
| 668 | | | | 'askfr','(ask|q)=',
|
| 669 | | | | 'askit','(ask|q)=',
|
| 670 | | | | 'asknl','(ask|q)=',
|
| 671 | | | | 'ask','(ask|q)=',
|
| 672 | | | | 'atomz','sp-q=',
|
| 673 | | | | 'euroseek','query=',
|
| 674 | | | | 'findarticles','key=',
|
| 675 | | | | 'go2net','general=',
|
| 676 | | | | 'hotbot','mt=',
|
| 677 | | | | 'infospace','qkw=',
|
| 678 | | | | 'kvasir', 'q=',
|
| 679 | | | | 'looksmart','key=',
|
| 680 | | | | 'mamma','query=',
|
| 681 | | | | 'metacrawler','general=',
|
| 682 | | | | 'nbci','keyword=',
|
| 683 | | | | 'northernlight','qr=',
|
| 684 | | | | 'overture','keywords=',
|
| 685 | | | | 'dogpile', 'q(|kw)=',
|
| 686 | | | | 'spray','string=',
|
| 687 | | | | 'teoma','q=',
|
| 688 | | | | 'webcrawler','searchText=',
|
| 689 | | | | 'wisenut','query=',
|
| 690 | | | | 'ixquick', 'query=',
|
| 691 | | | | 'earthlink', 'q=',
|
| 692 | | | | 'iune','(keywords|q)=',
|
| 693 | | | | 'blingo','q=',
|
| 694 | | | | 'centraldatabase','query=',
|
| 695 | | | | 'clusty','query=',
|
| 696 | | | | 'mysearch','searchfor=',
|
| 697 | | | | 'vivisimo','query=',
|
| 698 | | | | # kartoo: No keywords passed in referring URL.
|
| 699 | | | | 'kartoo','',
|
| 700 | | | | 'icerocket','q=',
|
| 701 | | | | 'sphere','q=',
|
| 702 | | | | 'ledix','q=',
|
| 703 | | | | 'shawca','q=',
|
| 704 | | | | 'searchalot','q=',
|
| 705 | | | | 'copernic','web\/',
|
| 706 | | | | 'avantfind','keywords=',
|
| 707 | | | | 'steadysearch','w=',
|
| 708 | | | | # Chello Portals
|
| 709 | | | | 'chelloat','q1=',
|
| 710 | | | | 'chellobe','q1=',
|
| 711 | | | | 'chellocz','q1=',
|
| 712 | | | | 'chellofr','q1=',
|
| 713 | | | | 'chellohu','q1=',
|
| 714 | | | | 'chellonl','q1=',
|
| 715 | | | | 'chellono','q1=',
|
| 716 | | | | 'chellopl','q1=',
|
| 717 | | | | 'chellose','q1=',
|
| 718 | | | | 'chellosk','q1=',
|
| 719 | | | | 'chellocom','q1=',
|
| 720 | | | | # Mirago
|
| 721 | | | | 'miragobe','(txtsearch|qry)=',
|
| 722 | | | | 'miragoch','(txtsearch|qry)=',
|
| 723 | | | | 'miragode','(txtsearch|qry)=',
|
| 724 | | | | 'miragodk','(txtsearch|qry)=',
|
| 725 | | | | 'miragoes','(txtsearch|qry)=',
|
| 726 | | | | 'miragofr','(txtsearch|qry)=',
|
| 727 | | | | 'miragoit','(txtsearch|qry)=',
|
| 728 | | | | 'miragonl','(txtsearch|qry)=',
|
| 729 | | | | 'miragono','(txtsearch|qry)=',
|
| 730 | | | | 'miragose','(txtsearch|qry)=',
|
| 731 | | | | 'miragocouk','(txtsearch|qry)=',
|
| 732 | | | | 'mirago','(txtsearch|qry)=',
|
| 733 | | | | 'answerbus','', # Does not provide query parameters
|
| 734 | | | | 'icq','q=',
|
| 735 | | | | 'nusearch','nusearch_terms=',
|
| 736 | | | | 'goodsearch','Keywords=',
|
| 737 | | | | 'scroogle','Gw=', # Does not always provide query parameters
|
| 738 | | | | 'questionanswering','',
|
| 739 | | | | 'mywebsearch','searchfor=',
|
| 740 | | | | 'comettoolbar','qry=',
|
| 741 | | | | # Social Bookmarking Services
|
| 742 | | | | 'delicious','all=',
|
| 743 | | | | 'digg','s=',
|
| 744 | | | | 'stumbleupon','',
|
| 745 | | | | 'swik','swik\.net/', # does not work. Keywords follow domain, e.g. http://swik.net/awstats+analytics
|
| 746 | | | | 'segnalo','',
|
| 747 | | | | 'ineffabile','',
|
| 748 | | | | # Minor Australian search engines
|
| 749 | | | | 'anzwers','search=',
|
| 750 | | | | # Minor brazilian search engines
|
| 751 | | | | 'engine','p1=', 'miner','q=',
|
| 752 | | | | # Minor chinese search engines
|
| 753 | | | | 'baidu','word=', 'sina', 'word=', 'sohu','word=', 'sogou', 'query=',
|
| 754 | | | | # Minor czech search engines
|
| 755 | | | | 'atlas','searchtext=', 'seznam','w=', 'quick','query=', 'centrum','q=', 'jyxo','s=', 'najdi','dotaz=', 'redbox','srch=',
|
| 756 | | | | # Minor danish search engines
|
| 757 | | | | 'opasia','q=', 'danielsen','q=', 'sol','q=', 'jubii','soegeord=', 'finddk','words=', 'edderkoppen','query=', 'orbis','search_field=', '1klik','query=', 'ofir','querytext=',
|
| 758 | | | | # Minor dutch search engines
|
| 759 | | | | 'ilse','search_for=', 'vindex','in=',
|
| 760 | | | | # Minor english search engines
|
| 761 | | | | 'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
|
| 762 | | | | 'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=',
|
| 763 | | | | # Minor finnish search engines
|
| 764 | | | | 'haku','w=',
|
| 765 | | | | # Minor french search engines
|
| 766 | | | | 'francite','name=', 'clubinternet', 'q=',
|
| 767 | | | | 'toile', 'q=',
|
| 768 | | | | 'biglotron','question=',
|
| 769 | | | | 'mozbot','q=',
|
| 770 | | | | # Minor german search engines
|
| 771 | | | | 'aolde','q=',
|
| 772 | | | | 'fireball','q=', 'infoseek','qt=', 'webde','su=',
|
| 773 | | | | 'abacho','q=', 't-online','q=',
|
| 774 | | | | 'metaspinner','qry=',
|
| 775 | | | | 'metacrawler_de','qry=',
|
| 776 | | | | 'wwweasel','q=',
|
| 777 | | | | 'netluchs','query=',
|
| 778 | | | | 'schoenerbrausen','q=',
|
| 779 | | | | # Minor Hungarian search engines
|
| 780 | | | | 'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',
|
| 781 | | | | 'keresolap_hu','q=',
|
| 782 | | | | 'tango_hu','q=',
|
| 783 | | | | 'polymeta_hu','',
|
| 784 | | | | # Minor Indian search engines
|
| 785 | | | | 'sify','keyword=',
|
| 786 | | | | # Minor Italian search engines
|
| 787 | | | | 'virgilio','qs=',
|
| 788 | | | | 'arianna','query=',
|
| 789 | | | | 'supereva','q=',
|
| 790 | | | | 'kataweb','q=',
|
| 791 | | | | 'aliceitmaster','qs=',
|
| 792 | | | | 'aliceit','qs=',
|
| 793 | | | | 'gotuneed','', # Not yet known
|
| 794 | | | | 'godado','Keywords=',
|
| 795 | | | | 'jumpy\.it','searchWord=',
|
| 796 | | | | 'shinyseek\.it','KEY=',
|
| 797 | | | | 'teecnoit','q=',
|
| 798 | | | | # Minor Japanese search engines
|
| 799 | | | | 'askjp','(ask|q)=',
|
| 800 | | | | 'sagool','q=',
|
| 801 | | | | # Minor Norwegian search engines
|
| 802 | | | | 'start','q=', 'eniro','q=',
|
| 803 | | | | # Minor Polish search engines
|
| 804 | | | | 'wp','szukaj=',
|
| 805 | | | | 'onetpl','qt=',
|
| 806 | | | | 'dodajpl','keyword=',
|
| 807 | | | | 'gazetapl','slowo=',
|
| 808 | | | | 'gerypl','q=',
|
| 809 | | | | 'hogapl','qt=',
|
| 810 | | | | 'netsprintpl','q=',
|
| 811 | | | | 'interiapl','q=',
|
| 812 | | | | 'katalogonetpl','qt=',
|
| 813 | | | | 'o2pl','qt=',
|
| 814 | | | | 'polskapl','qt=',
|
| 815 | | | | 'szukaczpl','q=',
|
| 816 | | | | 'wowpl','q=',
|
| 817 | | | | # Minor russian search engines
|
| 818 | | | | 'yandex', 'text=', 'rambler','words=', 'aport', 'r=', 'metabot', 'st=',
|
| 819 | | | | # Minor swedish search engines
|
| 820 | | | | 'passagen','q=',
|
| 821 | | | | 'enirose','q=',
|
| 822 | | | | # Minor swiss search engines
|
| 823 | | | | 'searchch', 'q=', 'bluewin', 'qry='
|
| 824 | | | | );
|
| 825 | | | |
|
| 826 | | | | # SearchEnginesKnownUrlNotFound
|
| 827 | | | | # Known rules to extract not found keywords from a referrer search engine URL
|
| 828 | | | | #------------------------------------------------------------------------------
|
| 829 | 1 | 3.0e-6 | 3.0e-6 | %SearchEnginesKnownUrlNotFound=(
|
| 830 | | | | # Most common search engines
|
| 831 | | | | 'msn','origq='
|
| 832 | | | | );
|
| 833 | | | |
|
| 834 | | | | # If no rules are known, WordsToExtractSearchUrl will be used to search keyword parameter
|
| 835 | | | | # If no rules are known and search in WordsToExtractSearchUrl failed, this will be used to clean URL of not keyword parameters.
|
| 836 | | | | #------------------------------------------------------------------------------
|
| 837 | 1 | 1.3e-5 | 1.3e-5 | @WordsToExtractSearchUrl= ('ask=','claus=','general=','key=','kw=','keyword=','keywords=','MT=','p=','q=','qr=','qt=','query=','s=','search=','searchText=','string=','su=','txtsearch=','w=');
|
| 838 | 1 | 2.6e-5 | 2.6e-5 | @WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look=');
|
| 839 | | | |
|
| 840 | | | | # SearchEnginesKnownUTFCoding
|
| 841 | | | | # Known parameter that proves a search engine has coded its parameters in UTF-8
|
| 842 | | | | #------------------------------------------------------------------------------
|
| 843 | 1 | 3.0e-6 | 3.0e-6 | %SearchEnginesKnownUTFCoding=(
|
| 844 | | | | # Most common search engines
|
| 845 | | | | 'google','ie=utf-8',
|
| 846 | | | | 'alltheweb','cs=utf-8'
|
| 847 | | | | );
|
| 848 | | | |
|
| 849 | | | |
|
| 850 | | | | # SearchEnginesHashLib
|
| 851 | | | | # List of search engines names
|
| 852 | | | | # 'search_engine_id', 'search_engine_name',
|
| 853 | | | | #------------------------------------------------------------------------------
|
| 854 | 1 | 0.00016 | 0.00016 | %SearchEnginesHashLib=(
|
| 855 | | | | # Major international search engines
|
| 856 | | | | 'alexa','<a href="http://www.alexa.com/" title="Search Engine Home Page [new window]" target="_blank">Alexa</a>',
|
| 857 | | | | 'alltheweb','<a href="http://www.alltheweb.com/" title="Search Engine Home Page [new window]" target="_blank">AllTheWeb</a>',
|
| 858 | | | | 'altavista','<a href="http://www.altavista.com/" title="Search Engine Home Page [new window]" target="_blank">AltaVista</a>',
|
| 859 | | | | 'a9', '<a href="http://www.a9.com/" title="Search Engine Home Page [new window]" target="_blank">A9</a>',
|
| 860 | | | | 'dmoz','<a href="http://dmoz.org/" title="Search Engine Home Page [new window]" target="_blank">DMOZ</a>',
|
| 861 | | | | 'google_base','<a href="http://base.google.com/" title="Search Engine Home Page [new window]" target="_blank">Google (Base)</a>',
|
| 862 | | | | 'google_froogle','<a href="http://froogle.google.com/" title="Search Engine Home Page [new window]" target="_blank">Froogle (Google)</a>',
|
| 863 | | | | 'google_groups','<a href="http://groups.google.com/" title="Search Engine Home Page [new window]" target="_blank">Google (Groups)</a>',
|
| 864 | | | | 'google_image','<a href="http://images.google.com/" title="Search Engine Home Page [new window]" target="_blank">Google (Images)</a>',
|
| 865 | | | | 'google_cache','<a href="http://www.google.com/help/features.html#cached" title="Search Engine Home Page [new window]" target="_blank">Google (cache)</a>',
|
| 866 | | | | 'google','<a href="http://www.google.com/" title="Search Engine Home Page [new window]" target="_blank">Google</a>',
|
| 867 | | | | 'lycos','<a href="http://www.lycos.com/" title="Search Engine Home Page [new window]" target="_blank">Lycos</a>',
|
| 868 | | | | 'msn','<a href="http://search.msn.com/" title="Search Engine Home Page [new window]" target="_blank">MSN Search</a>',
|
| 869 | | | | 'live','<a href="http://www.live.com/" title="Search Engine Home Page [new window]" target="_blank">Windows Live</a>',
|
| 870 | | | | 'netscape','<a href="http://www.netscape.com/" title="Search Engine Home Page [new window]" target="_blank">Netscape</a>',
|
| 871 | | | | 'aol','<a href="http://www.aol.com/" title="Search Engine Home Page [new window]" target="_blank">AOL</a>',
|
| 872 | | | | 'terra','<a href="http://www.terra.es/" title="Search Engine Home Page [new window]" target="_blank">Terra</a>',
|
| 873 | | | | 'tiscali','<a href="http://search.tiscali.com/" title="Search Engine Home Page [new window]" target="_blank">Tiscali</a>',
|
| 874 | | | | 'voila','<a href="http://www.voila.fr/" title="Search Engine Home Page [new window]" target="_blank">Voila</a>',
|
| 875 | | | | 'search.com','<a href="http://www.search.com/" title="Search Engine Home Page [new window]" target="_blank">Search.com</a>',
|
| 876 | | | | 'yahoo_mindset','<a href="http://mindset.research.yahoo.com/" title="Search Engine Home Page [new window]" target="_blank">Yahoo! Mindset</a>',
|
| 877 | | | | 'yahoo','<a href="http://www.yahoo.com/" title="Search Engine Home Page [new window]" target="_blank">Yahoo!</a>',
|
| 878 | | | | 'sympatico','<a href="http://sympatico.msn.ca/" title="Search Engine Home Page [new window]" target="_blank">Sympatico</a>',
|
| 879 | | | | 'excite','<a href="http://www.excite.com/" title="Search Engine Home Page [new window]" target="_blank">Excite</a>',
|
| 880 | | | | # Minor international search engines
|
| 881 | | | | 'google4counter','<a href="http://www.4-counter.com/" title="Search Engine Home Page [new window]" target="_blank">4-counter (Google)</a>',
|
| 882 | | | | 'att','<a href="http://www.att.net/" title="Search Engine Home Page [new window]" target="_blank">AT&T search (powered by Google)</a>',
|
| 883 | | | | 'bungeebonesdotcom','<a href="http://BungeeBones.com/search.php/" title="Search Engine Home Page [new window]" target="_blank">BungeeBones</a>',
|
| 884 | | | | 'go','Go.com',
|
| 885 | | | | 'askde','<a href="http://de.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask Deutschland</a>',
|
| 886 | | | | 'askes','<a href="http://es.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask España</a>', # break out Ask country specific engines.
|
| 887 | | | | 'askfr','<a href="http://fr.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask France</a>',
|
| 888 | | | | 'askit','<a href="http://it.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask Italia</a>',
|
| 889 | | | | 'asknl','<a href="http://nl.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask Nederland</a>',
|
| 890 | | | | 'ask','<a href="http://www.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask</a>',
|
| 891 | | | | 'atomz','Atomz',
|
| 892 | | | | 'dejanews','DejaNews',
|
| 893 | | | | 'euroseek','Euroseek',
|
| 894 | | | | 'findarticles','Find Articles',
|
| 895 | | | | 'go2net','Go2Net (Metamoteur)',
|
| 896 | | | | 'hotbot','Hotbot',
|
| 897 | | | | 'infospace','InfoSpace',
|
| 898 | | | | 'kvasir','Kvasir',
|
| 899 | | | | 'looksmart','Looksmart',
|
| 900 | | | | 'mamma','Mamma',
|
| 901 | | | | 'metacrawler','MetaCrawler (Metamoteur)',
|
| 902 | | | | 'nbci','NBCI',
|
| 903 | | | | 'northernlight','NorthernLight',
|
| 904 | | | | 'overture','Overture', # Replace 'goto\.com','Goto.com',
|
| 905 | | | | 'dogpile','<a href="http://www.dogpile.com/" title="Search Engine Home Page [new window]" target="_blank">Dogpile</a>',
|
| 906 | | | | 'spray','Spray',
|
| 907 | | | | 'teoma','<a href="http://search.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Teoma</a>', # Replace 'directhit\.com','DirectHit',
|
| 908 | | | | 'webcrawler','<a href="http://www.webcrawler.com/" title="Search Engine Home Page [new window]" target="_blank">WebCrawler</a>',
|
| 909 | | | | 'wisenut','WISENut',
|
| 910 | | | | 'ixquick','<a href="http://www.ixquick.com/" title="Search Engine Home Page [new window]" target="_blank">ix quick</a>',
|
| 911 | | | | 'earthlink', 'Earth Link',
|
| 912 | | | | 'iune','<a href="http://www.i-une.com/" title="Search Engine Home Page [new window]" target="_blank">i-une</a>',
|
| 913 | | | | 'blingo','<a href="http://www.blingo.com/" title="Search Engine Home Page [new window]" target="_blank">Blingo</a>',
|
| 914 | | | | 'centraldatabase','<a href="http://search.centraldatabase.org/" title="Search Engine Home Page [new window]" target="_blank">GPU p2p search</a>',
|
| 915 | | | | 'clusty','<a href="http://www.clusty.com/" title="Search Engine Home Page [new window]" target="_blank">Clusty</a>',
|
| 916 | | | | 'mysearch','<a href="http://www.mysearch.com" title="Search Engine Home Page [new window]" target="_blank">My Search</a>',
|
| 917 | | | | 'vivisimo','<a href="http://www.vivisimo.com/" title="Search Engine Home Page [new window]" target="_blank">Vivisimo</a>',
|
| 918 | | | | 'kartoo','<a href="http://www.kartoo.com/" title="Search Engine Home Page [new window]" target="_blank">Kartoo</a>',
|
| 919 | | | | 'icerocket','<a href="http://www.icerocket.com/" title="Search Engine Home Page [new window]" target="_blank">Icerocket (Blog)</a>',
|
| 920 | | | | 'sphere','<a href="http://www.sphere.com/" title="Search Engine Home Page [new window]" target="_blank">Sphere (Blog)</a>',
|
| 921 | | | | 'ledix','<a href="http://www.ledix.net/" title="Search Engine Home Page [new window]" target="_blank">Ledix</a>',
|
| 922 | | | | 'shawca','<a href="http://start.shaw.ca/" title="Search Engine Home Page [new window]" target="_blank">Shaw.ca</a>',
|
| 923 | | | | 'searchalot','<a href="http://www.searchalot.com/" title="Search Engine Home Page [new window]" target="_blank">Searchalot</a>',
|
| 924 | | | | 'copernic','<a href="http://www.copernic.com/" title="Search Engine Home Page [new window]" target="_blank">Copernic</a>',
|
| 925 | | | | 'avantfind','<a href="http://www.avantfind.com/" title="Search Engine Home Page [new window]" target="_blank">Avantfind</a>',
|
| 926 | | | | 'steadysearch','<a href="http://www.avantfind.com/" title="Search Engine Home Page [new window]" target="_blank">Avantfind</a>',
|
| 927 | | | | # Chello Portals
|
| 928 | | | | 'chelloat','<a href="http://www.chello.at/" title="Search Engine Home Page [new window]" target="_blank">Chello Austria</a>',
|
| 929 | | | | 'chellobe','<a href="http://www.chello.be/" title="Search Engine Home Page [new window]" target="_blank">Chello Belgium</a>',
|
| 930 | | | | 'chellocz','<a href="http://www.chello.cz/" title="Search Engine Home Page [new window]" target="_blank">Chello Czech Republic</a>',
|
| 931 | | | | 'chellofr','<a href="http://www.chello.fr/" title="Search Engine Home Page [new window]" target="_blank">Chello France</a>',
|
| 932 | | | | 'chellohu','<a href="http://www.chello.hu/" title="Search Engine Home Page [new window]" target="_blank">Chello Hungary</a>',
|
| 933 | | | | 'chellonl','<a href="http://www.chello.nl/" title="Search Engine Home Page [new window]" target="_blank">Chello Netherlands</a>',
|
| 934 | | | | 'chellono','<a href="http://www.chello.no/" title="Search Engine Home Page [new window]" target="_blank">Chello Norway</a>',
|
| 935 | | | | 'chellopl','<a href="http://www.chello.pl/" title="Search Engine Home Page [new window]" target="_blank">Chello Poland</a>',
|
| 936 | | | | 'chellose','<a href="http://www.chello.se/" title="Search Engine Home Page [new window]" target="_blank">Chello Sweden</a>',
|
| 937 | | | | 'chellosk','<a href="http://www.chello.sk/" title="Search Engine Home Page [new window]" target="_blank">Chello Slovakia</a>',
|
| 938 | | | | 'chellocom','<a href="http://www.chello.com/" title="Search Engine Home Page [new window]" target="_blank">Chello (Country not recognized)</a>',
|
| 939 | | | | # Mirago
|
| 940 | | | | 'miragobe','<a href="http://www.mirago.be/" title="Search Engine Home Page [new window]" target="_blank">Mirago Belgium</a>',
|
| 941 | | | | 'miragoch','<a href="http://www.mirago.ch/" title="Search Engine Home Page [new window]" target="_blank">Mirago Switzerland</a>',
|
| 942 | | | | 'miragode','<a href="http://www.mirago.de/" title="Search Engine Home Page [new window]" target="_blank">Mirago Germany</a>',
|
| 943 | | | | 'miragodk','<a href="http://www.mirago.dk/" title="Search Engine Home Page [new window]" target="_blank">Mirago Denmark</a>',
|
| 944 | | | | 'miragoes','<a href="http://es.mirago.com/" title="Search Engine Home Page [new window]" target="_blank">Mirago Spain</a>',
|
| 945 | | | | 'miragofr','<a href="http://www.mirago.fr/" title="Search Engine Home Page [new window]" target="_blank">Mirago France</a>',
|
| 946 | | | | 'miragoit','<a href="http://www.mirago.it/" title="Search Engine Home Page [new window]" target="_blank">Mirago Italy</a>',
|
| 947 | | | | 'miragonl','<a href="http://www.mirago.nl/" title="Search Engine Home Page [new window]" target="_blank">Mirago Netherlands</a>',
|
| 948 | | | | 'miragono','<a href="http://no.mirago.com/" title="Search Engine Home Page [new window]" target="_blank">Mirago Norway</a>',
|
| 949 | | | | 'miragose','<a href="http://www.mirago.se/" title="Search Engine Home Page [new window]" target="_blank">Mirago Sweden</a>',
|
| 950 | | | | 'miragocouk','<a href="http://zone.mirago.co.uk/" title="Search Engine Home Page [new window]" target="_blank">Mirago UK</a>',
|
| 951 | | | | 'mirago','<a href="http://www.mirago.com/" title="Search Engine Home Page [new window]" target="_blank">Mirago (country unknown)</a>',
|
| 952 | | | | 'answerbus','<a href="http://www.answerbus.com/" title="Search Engine Home Page [new window]" target="_blank">Answerbus</a>',
|
| 953 | | | | 'icq','<a href="http://www.icq.com/" title="Search Engine Home Page [new window]" target="_blank">icq</a>',
|
| 954 | | | | 'nusearch','<a href="http://www.nusearch.com/" title="Search Engine Home Page [new window]" target="_blank">Nusearch</a>',
|
| 955 | | | | 'goodsearch','<a href="http://www.goodsearch.com/" title="Search Engine Home Page [new window]" target="_blank">GoodSearch</a>',
|
| 956 | | | | 'scroogle','<a href="http://www.scroogle.org/" title="Search Engine Home Page [new window]" target="_blank">Scroogle</a>',
|
| 957 | | | | 'questionanswering','<a href="http://www.questionanswering.com/" title="Questionanswering home page [new window]" target="_blank">Questionanswering</a>',
|
| 958 | | | | 'mywebsearch','<a href="http://search.mywebsearch.com/" title="MyWebSearch home page [new window]" target="_blank">MyWebSearch</a>',
|
| 959 | | | | 'comettoolbar','<a href="http://as.starware.com/dp/search" title="Comet toolbar search home page [new window]" target="_blank">Comet toolbar search</a>',
|
| 960 | | | | # Social Bookmarking Services
|
| 961 | | | | 'delicious','<a href="http://del.icio.us/" title="del.icio.us home page [new window]" target="_blank">del.icio.us</a> (Social Bookmark)',
|
| 962 | | | | 'digg','<a href="http://www.digg.com/" title="Digg home page [new window]" target="_blank">Digg</a> (Social Bookmark)',
|
| 963 | | | | 'stumbleupon','<a href="http://www.stumbleupon.com/" title="Stumbleupon home page [new window]" target="_blank">Stumbleupon</a> (Social Bookmark)',
|
| 964 | | | | 'swik','<a href="http://swik.net/" title="Swik home page [new window]" target="_blank">Swik</a> (Social Bookmark)',
|
| 965 | | | | 'segnalo','<a href="http://segnalo.alice.it/" title="Segnalo home page [new window]" target="_blank">Segnalo</a> (Social Bookmark)',
|
| 966 | | | | 'ineffabile','<a href="http://www.ineffabile.it/" title="Ineffabile.it home page [new window]" target="_blank">Ineffabile.it</a> (Social Bookmark)',
|
| 967 | | | | # Minor Australian search engines
|
| 968 | | | | 'anzwers','<a href="http://anzwers.com.au/" title="anzwers.com.au home page [new window]" target="_blank">anzwers.com.au</a>',
|
| 969 | | | | # Minor brazilian search engines
|
| 970 | | | | 'engine','Cade', 'miner','Meta Miner',
|
| 971 | | | | # Minor chinese search engines
|
| 972 | | | | 'baidu','Baidu', 'sina','Sina', 'sohu','Sohu', 'sogou','<a href="http://www.sogou.com/" title="Search Engine Home Page [new window]" target="_blank">Sogou</a>',
|
| 973 | | | | # Minor czech search engines
|
| 974 | | | | 'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz',
|
| 975 | | | | # Minor danish search-engines
|
| 976 | | | | 'opasia','Opasia', 'danielsen','Thor (danielsen.com)', 'sol','SOL', 'jubii','Jubii', 'finddk','Find', 'edderkoppen','Edderkoppen', 'netstjernen','Netstjernen', 'orbis','Orbis', 'tyfon','Tyfon', '1klik','1Klik', 'ofir','Ofir',
|
| 977 | | | | # Minor dutch search engines
|
| 978 | | | | 'ilse','Ilse','vindex','Vindex\.nl',
|
| 979 | | | | # Minor english search engines
|
| 980 | | | | 'askuk','<a href="http://uk.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask UK</a>',
|
| 981 | | | | 'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK',
|
| 982 | | | | 'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk',
|
| 983 | | | | # Minor finnish search engines
|
| 984 | | | | 'haku','Ihmemaa',
|
| 985 | | | | # Minor french search engines
|
| 986 | | | | 'aolfr','AOL (fr)', 'ctrouve','C\'est trouvé', 'francite','Francité', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet',
|
| 987 | | | | 'toile', 'Toile du Québec',
|
| 988 | | | | 'biglotron','<a href="http://www.biglotron.com/" title="Search Engine Home Page [new window]" target="_blank">Biglotron</a>',
|
| 989 | | | | 'mozbot','<a href="http://www.mozbot.fr/" title="Search Engine Home Page [new window]" target="_blank">Mozbot</a>',
|
| 990 | | | | # Minor German search engines
|
| 991 | | | | 'aolde','AOL (de)',
|
| 992 | | | | 'fireball','Fireball', 'infoseek','Infoseek', 'webde','Web.de',
|
| 993 | | | | 'abacho','Abacho', 't-online','T-Online',
|
| 994 | | | | 'allesklar','allesklar.de', 'meinestadt','meinestadt.de',
|
| 995 | | | | 'metaspinner','metaspinner',
|
| 996 | | | | 'metacrawler_de','metacrawler.de',
|
| 997 | | | | 'wwweasel','<a href="http://wwweasel.de/" title="Search Engine Home Page [new window]" target="_blank">WWWeasel</a>',
|
| 998 | | | | 'netluchs','<a href="http://www.netluchs.de/" title="Search Engine Home Page [new window]" target="_blank">Netluchs</a>',
|
| 999 | | | | 'schoenerbrausen','<a href="http://www.schoenerbrausen.de/" title="Search Engine Home Page [new window]" target="_blank">Schoenerbrausen/</a>',
|
| 1000 | | | | # Minor hungarian search engines
|
| 1001 | | | | 'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkeresõ', 'goliat','Góliát', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Keresõ',
|
| 1002 | | | | 'tango_hu','<a href="http://tango.hu/" title="Search Engine Home Page [new window]" target="_blank">Tango</a>',
|
| 1003 | | | | 'keresolap_hu','<a href="http://keresolap.hu/" title="Search Engine Home Page [new window]" target="_blank">Tango keresolap</a>',
|
| 1004 | | | | 'polymeta_hu','<a href="http://www.polymeta.hu/" title="Search Engine Home Page [new window]" target="_blank">Polymeta</a>',
|
| 1005 | | | | # Minor Indian search engines
|
| 1006 | | | | 'sify','<a href="http://search.sify.com/" title="Search Engine Home Page [new window]" target="_blank">Sify</a>',
|
| 1007 | | | | # Minor Italian search engines
|
| 1008 | | | | 'virgilio','<a href="http://www.virgilio.it/" title="Search Engine Home Page [new window]" target="_blank">Virgilio</a>',
|
| 1009 | | | | 'arianna','<a href="http://arianna.libero.it/" title="Search Engine Home Page [new window]" target="_blank">Arianna</a>',
|
| 1010 | | | | 'supereva','<a href="http://search.supereva.com/" title="Search Engine Home Page [new window]" target="_blank">Supereva</a>',
|
| 1011 | | | | 'kataweb','<a href="http://www.kataweb.it/ricerca/" title="Search Engine Home Page [new window]" target="_blank">Kataweb</a>',
|
| 1012 | | | | 'aliceitmaster','<a href="http://www.alice.it/" title="Search Engine Home Page [new window]" target="_blank">search.alice.it.master</a>',
|
| 1013 | | | | 'aliceit','<a href="http://www.alice.it/" title="Search Engine Home Page [new window]" target="_blank">alice.it</a>',
|
| 1014 | | | | 'gotuneed','<a href="http://www.gotuneed.com/" title="Search Engine Home Page [new window]" target="_blank">got u need</a>',
|
| 1015 | | | | 'godado','Godado.it',
|
| 1016 | | | | 'jumpy\.it','Jumpy.it',
|
| 1017 | | | | 'shinyseek\.it','Shinyseek.it',
|
| 1018 | | | | 'teecnoit','<a href="http://www.teecno.it/" title="Teecno home page [new window]" target="_blank">Teecno</a>',
|
| 1019 | | | | # Minor Japanese search engines
|
| 1020 | | | | 'askjp','<a href="http://www.ask.jp/" title="Search E@SearchEngngine Home Page [new window]" target="_blank">Ask Japan</a>',
|
| 1021 | | | | 'sagool','<a href="http://sagool.jp/" title="Sagool home page [new window]" target="_blank">Sagool</a>',
|
| 1022 | | | | # Minor Norwegian search engines
|
| 1023 | | | | 'start','start.no', 'eniro','<a href="http://www.eniro.no/" title="Search Engine Home Page [new window]" target="_blank">Eniro</a>',
|
| 1024 | | | | # Minor polish search engines
|
| 1025 | | | | 'wp','<a href="http://szukaj.wp.pl/" title="Wirtualna Polska home page [new window]" target="_blank">Wirtualna Polska</a>',
|
| 1026 | | | | 'onetpl','<a href="http://szukaj.onet.pl/" title="Onet.pl home page [new window]" target="_blank">Onet.pl</a>',
|
| 1027 | | | | 'dodajpl','<a href="http://www.dodaj.pl/" title="Dodaj.pl home page [new window]" target="_blank">Dodaj.pl</a>',
|
| 1028 | | | | 'gazetapl','<a href="http://szukaj.gazeta.pl/" title="Gazeta.pl home page [new window]" target="_blank">Gazeta.pl</a>',
|
| 1029 | | | | 'gerypl','<a href="http://szukaj.gery.pl/" title="Gery.pl home page [new window]" target="_blank">Gery.pl</a>',
|
| 1030 | | | | 'hogapl','<a href="http://www.hoga.pl/" title="Hoga.pl home page [new window]" target="_blank">Hoga.pl</a>',
|
| 1031 | | | | 'netsprintpl','<a href="http://www.netsprint.pl/" title="NetSprint.pl home page [new window]" target="_blank">NetSprint.pl</a>',
|
| 1032 | | | | 'interiapl','<a href="http://www.google.interia.pl/" title="Interia.pl home page [new window]" target="_blank">Interia.pl</a>',
|
| 1033 | | | | 'katalogonetpl','<a href="http://katalog.onet.pl/" ti@SearchEngtle="Katalog.Onet.pl home page [new window]" target="_blank">Katalog.Onet.pl</a>',
|
| 1034 | | | | 'o2pl','<a href="http://szukaj2.o2.pl/" title="o2.pl home page [new window]" target="_blank">o2.pl</a>',
|
| 1035 | | | | 'polskapl','<a href="http://szukaj.polska.pl/" title="Polska home page [new window]" target="_blank">Polska</a>',
|
| 1036 | | | | 'szukaczpl','<a href="http://www.szukacz.pl/" title="Szukacz home page [new window]" target="_blank">Szukacz</a>',
|
| 1037 | | | | 'wowpl','<a href="http://szukaj.wow.pl/" title="Wow.pl home page [new window]" target="_blank">Wow.pl</a>',
|
| 1038 | | | | # Minor russian search engines
|
| 1039 | | | | 'yandex', 'Yandex', 'aport', 'Aport', 'rambler', 'Rambler', 'turtle', 'Turtle', 'metabot', 'MetaBot',
|
| 1040 | | | | # Minor Swedish search engines
|
| 1041 | | | | 'passagen','Evreka',
|
| 1042 | | | | 'enirose','<a href="http://www.eniro.se/" title="Eniro Sverige home page [new window]" target="_blank">Eniro Sverige</a>',
|
| 1043 | | | | # Minor Slovak search engines
|
| 1044 | | | | 'zoznam','<a href="http://www.zoznam.sk/" Searc Engine home page [new window]" target="_blank">Zoznam</a>',
|
| 1045 | | | | # Minor Portuguese search engines
|
| 1046 | | | | 'sapo','<a href="http://www.sapo.pt/" Searc Engine home page [new window]" target="_blank">Sapo</a>',
|
| 1047 | | | | # Minor Swiss search engines
|
| 1048 | | | | 'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch',
|
| 1049 | | | | # Generic search engines
|
| 1050 | | | | 'search','Unknown search engines'
|
| 1051 | | | | );
|
| 1052 | | | |
|
| 1053 | | | |
|
| 1054 | | | | # Sanity check.
|
| 1055 | | | | # Enable this code and run perl search_engines.pm to check file entries are ok
|
| 1056 | | | | #-----------------------------------------------------------------------------
|
| 1057 | | | | #foreach my $key (@SearchEnginesSearchIDOrder_list1) {
|
| 1058 | | | | # if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");
|
| 1059 | | | | # foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } }
|
| 1060 | | | | # foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } }
|
| 1061 | | | | #} }
|
| 1062 | | | | #foreach my $key (@SearchEnginesSearchIDOrder_list2) {
|
| 1063 | | | | # if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");
|
| 1064 | | | | # foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } }
|
| 1065 | | | | # foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } }
|
| 1066 | | | | #} }
|
| 1067 | | | | #foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } }
|
| 1068 | | | | #foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } }
|
| 1069 | | | | #foreach my $key (keys %SearchEnginesKnownUrl) {
|
| 1070 | | | | # my $found=0;
|
| 1071 | | | | # foreach my $key2 (values %SearchEnginesHashID) {
|
| 1072 | | | | # if ($key eq $key2) { $found=1; last; }
|
| 1073 | | | | # }
|
| 1074 | | | | # if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; }
|
| 1075 | | | | #}
|
| 1076 | | | | #foreach my $key (keys %SearchEnginesHashLib) {
|
| 1077 | | | | # my $found=0;
|
| 1078 | | | | # foreach my $key2 (values %SearchEnginesHashID) {
|
| 1079 | | | | # if ($key eq $key2) { $found=1; last; }
|
| 1080 | | | | # }
|
| 1081 | | | | # if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; }
|
| 1082 | | | | #}
|
| 1083 | | | | #print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen;
|
| 1084 | | | |
|
| 1085 | 1 | 0.00032 | 0.00032 | 1;
|