做网站统计首先要有数据,数据从何而来?这需要网站要记录客户所访问的轨迹,记录用户访问每个页面的流向,给网站加过百度统计与cnzz的站长们估计都清楚,要想对某个页面进行统计,就要在该页面上加上统计代码,将用户的访问数据记录下来,进而生成统计数据。现在就来自己实现一个这样的数据采集与统计功能,具体步骤与相关代码如下:
一、设计表结构
先考虑数据的记录方式与保存位置,由于后期要用户这些数据,因此考虑将数据存储到数据库中,根据需要创建了三个表,Visitor用于记录来访用户的相关数据,VisitorRecord用于记录来访用户访问时间,着陆页面与跳出页面,表ViewRecord是访问记录,用于记录访问的基础数据。表结构如下:
二、插入数据
表创建好了,如何向其中插入数据呢,既然ViewRecord是记录基础数据,毫无疑问当用户每访问一次一个页面,就要向该表中插入一条数据;一个访问者就向Visitor表中插入一条数据,现在问题来了,我们如何来判断该用户是否来过呢,这里我们定义一台PC端为一个用户,如果表Vistor中已存在该用户的数据,只需更新表中访问次数visitingNum,而不需要再向Visitor表中添加数据。至于应该如何判断,这里采用的是用一插件fingerprint2.js来帮助识别是否为同一用户,这种文件类似于指纹识别,插件请移步至下载频道下载(下载地址:指纹识别插件 fingerprint2.js)。另外表VisitorRecord用来存储用户的来访时间、跳出时间及访问时长。
至于各个表插入数据的方法这里就不再赘述了,相信看到此文的读者都不在话下,这里只阐述具体的调用与实现,功能是放到一般处理程序中实现的,读者可自由变通。费话不多说,上代码:
Stat.ashx
public class Stat : IHttpHandler
{public void ProcessRequest(HttpContext context){string url = context.Request.PathInfo;string IP = Labbase.Common.Utils.GetIP();Labbase.BLL.TJ_LocalIP bllIP = new Labbase.BLL.TJ_LocalIP();Labbase.Model.TJ_Visitor mdTJVisitor = new Labbase.Model.TJ_Visitor();Labbase.BLL.TJ_Visitor bllTJVisitor = new Labbase.BLL.TJ_Visitor();Labbase.Model.TJ_VisitorRecord mdTJVisitorRecord = new Labbase.Model.TJ_VisitorRecord();Labbase.BLL.MVisitorRecord bllMVisitorRecord = new Labbase.BLL.MVisitorRecord();if (bllIP.Exists("LIP= '" + IP + "'"))return;if (isEngine(IP, context.Request.ServerVariables["HTTP_USER_AGENT"]))return;if (!string.IsNullOrEmpty(context.Request["InPage"])){if (context.Request.Cookies["lbGUID"] == null){HttpCookie cooklbGUID = new HttpCookie("lbGUID");cooklbGUID.Value = Guid.NewGuid().ToString();cooklbGUID.Expires = DateTime.MaxValue;context.Response.Cookies.Add(cooklbGUID);}else{Guid lbGUID;try{lbGUID = new Guid(context.Request.Cookies["lbGUID"].Value.ToString());}catch{return;}int lbGUIDCount = bllTJVisitor.Exists(new Guid(context.Request.Cookies["lbGUID"].Value)) == false ? 0 : 1;if (lbGUIDCount == 0){mdTJVisitor.visitingNum = 1;mdTJVisitor.VisitorID = lbGUID;mdTJVisitor.Bfingerprinting = context.Request["fingerprint"];mdTJVisitor.LastVisitingTime = DateTime.Now;mdTJVisitor.VIp = IP;mdTJVisitor.Loction = IPShowAddress(IP);//查询IP库bllTJVisitor.Add(mdTJVisitor);mdTJVisitorRecord.VRID = Guid.NewGuid();mdTJVisitorRecord.VisitorID = mdTJVisitor.VisitorID;mdTJVisitorRecord.InTime = DateTime.Now;mdTJVisitorRecord.outTime = DateTime.Now.AddSeconds(1);mdTJVisitorRecord.Entrance = context.Request.UrlReferrer.ToString();mdTJVisitorRecord.ExitPage = context.Request.UrlReferrer.ToString();bllMVisitorRecord.Add(mdTJVisitorRecord);}else{mdTJVisitor = bllTJVisitor.GetModel(lbGUID);if ((DateTime.Now - (DateTime)mdTJVisitor.LastVisitingTime).Minutes > 5){mdTJVisitorRecord.VRID = Guid.NewGuid();mdTJVisitorRecord.VisitorID = lbGUID;mdTJVisitorRecord.InTime = DateTime.Now;mdTJVisitorRecord.outTime = DateTime.Now.AddSeconds(1);mdTJVisitorRecord.Entrance = context.Request.UrlReferrer.ToString();mdTJVisitorRecord.ExitPage = context.Request.UrlReferrer.ToString();bllMVisitorRecord.Add(mdTJVisitorRecord);mdTJVisitor.LastVisitingTime = DateTime.Now;bllTJVisitor.Update(mdTJVisitor);}else{string vrid = bllMVisitorRecord.GetVRIDByGUID(lbGUID);if (vrid != ""){mdTJVisitor = bllTJVisitor.GetModel(lbGUID);mdTJVisitor.LastVisitingTime = DateTime.Now;bllTJVisitor.Update(mdTJVisitor);mdTJVisitorRecord.VRID = new Guid(vrid);}}}Labbase.Model.ViewRecord mdViewRecord = new Labbase.Model.ViewRecord();Labbase.BLL.MViewRecord bllMViewRecord = new Labbase.BLL.MViewRecord();mdViewRecord.VRID = mdTJVisitorRecord.VRID;mdViewRecord.ViewID = Guid.NewGuid();mdViewRecord.referenceUrl = System.Web.HttpUtility.UrlDecode(context.Request["referrer"]);if (Utils.IsNullOrEmpty(mdViewRecord.referenceUrl))mdViewRecord.referenceUrl = "";mdViewRecord.FullPagePath = System.Web.HttpUtility.UrlDecode(context.Request["InPage"]);mdViewRecord.ViewIP = IP;mdViewRecord.Localarea = IPShowAddress(IP);//查询IP库mdViewRecord.Vtitle = System.Web.HttpUtility.UrlDecode(context.Request["title"]);mdViewRecord.ViewTime = DateTime.Now;if (!Utils.IsNullOrEmpty(mdViewRecord.referenceUrl))mdViewRecord.SId = Enginer(new Uri(mdViewRecord.referenceUrl).DnsSafeHost);elsemdViewRecord.SId = 0;string AbsolutePath = context.Request.UrlReferrer.AbsolutePath.TrimStart(new char[] { '/' });if (AbsolutePath.Contains('-')){AbsolutePath = AbsolutePath.Substring(0, AbsolutePath.IndexOf('-'));}if (AbsolutePath.Contains('.')){AbsolutePath = AbsolutePath.Substring(0, AbsolutePath.IndexOf('.'));}int parID = 0;string[] pars;object objCompany;Labbase.BLL.supply bllsupply = new Labbase.BLL.supply();Labbase.BLL.product_category bllcategory = new Labbase.BLL.product_category();Labbase.Model.supplyInfo mdSupply = new Labbase.Model.supplyInfo();int companyID = 0;switch (AbsolutePath){case "IndustryNewsDetial":parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));Labbase.BLL.News bllNews = new Labbase.BLL.News();objCompany = bllNews.GetCompanyID("NewsID=" + parID);if (objCompany != null && !string.IsNullOrEmpty(objCompany.ToString()) && objCompany.ToString() != "0"){mdViewRecord.Querypar1 = int.Parse(objCompany.ToString());}else{mdViewRecord.Querypar1 = 0;}mdViewRecord.channel = "技术资料";break;case "SupplyDetial"://供应详情parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));string supplyID = parID.ToString();mdSupply = bllsupply.GetModel(Int32.Parse(supplyID));DataTable dtclass = bllcategory.GetProductClassInfo(" ClassXXID=" + mdSupply.ClassXXID);mdViewRecord.Querypar1 = mdSupply.CompanyID;companyID = int.Parse(mdSupply.CompanyID.ToString());if (dtclass.Rows.Count > 0){mdViewRecord.QueryPar2 = Int32.Parse(dtclass.Rows[0]["ClassID"].ToString());mdViewRecord.QueryPar3 = Int32.Parse(dtclass.Rows[0]["ClassXID"].ToString());}mdViewRecord.QueryPar4 = mdSupply.ClassXXID;mdViewRecord.channel = "供求信息";break;case "ProductLDetail"://产品详情parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));companyID = new Labbase.BLL.product().GetCompanyIDByProductID(parID);mdViewRecord.Querypar1 = companyID;mdViewRecord.channel = "产品信息";break;case "CompanyIndex"://公司库首页case "CompanyNewsList"://公司技术资料case "CompanyContact":case "CompanyInfo":parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));mdViewRecord.Querypar1 = parID;companyID = parID;mdViewRecord.channel = "公司主页";break;case "CompanyProduct"://公司供应case "CompanyProdutDetail"://公司供应详情case "CompanyNews"://公司技术资料详情pars = context.Request.UrlReferrer.PathAndQuery.TrimEnd(".html".ToCharArray()).Split('-');mdViewRecord.Querypar1 = int.Parse(pars[1]);companyID = int.Parse(pars[1]);mdViewRecord.channel = "公司主页";break;case "NewsLDetails":case "PrimeList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "资讯";break;case "IndustryNewsList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "技术资料";break;case "SupplyList":case "ProClass":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "供求信息";break;case "ProductBList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "产品信息";break;case "BrandsList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "品牌专区";break;case "CompanyList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "公司库";break;case "Exhibition":case "Exhibition/Detail":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "展会信息";break;case "ProductSearch":case "SupplySearch":case "NewsSearch":case "CompanySearch":case "BrandSearch":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "搜索";break;case "purchase":case "AboutmUs":case "ContactmUs":default:mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "其它";break;}if (!Utils.IsNullOrEmpty(mdViewRecord.referenceUrl)){//Engine_wrod EW = EngineWord(WebRequest.Create(mdViewRecord.referenceUrl));string keyws = GetKeyWord(mdViewRecord.referenceUrl);if (!string.IsNullOrEmpty(keyws)){Labbase.BLL.SearchWord bllSearchWord = new Labbase.BLL.SearchWord();Labbase.Model.SearchWord mdSearchWord = new Labbase.Model.SearchWord();object obj = bllSearchWord.GetSearchWordId("Word='" + keyws + "'");if (obj == null){mdSearchWord.SWID = Guid.NewGuid();mdSearchWord.LastDate = DateTime.Now;mdSearchWord.SumNum = 1;mdSearchWord.UserIP = IP;mdSearchWord.Word = keyws;mdSearchWord.CompanyID = companyID;bllSearchWord.Add(mdSearchWord);mdViewRecord.SWId = mdSearchWord.SWID;}else{mdViewRecord.SWId = new Guid(obj.ToString());mdSearchWord = bllSearchWord.GetModel(new Guid(obj.ToString()));mdSearchWord.SumNum = mdSearchWord.SumNum + 1;mdSearchWord.LastDate = DateTime.Now;mdSearchWord.CompanyID = companyID;bllSearchWord.Update(mdSearchWord);}}}bllMViewRecord.Add(mdViewRecord);}}if (!string.IsNullOrEmpty(context.Request["OutPage"])){Guid lbGUID;try{lbGUID = new Guid(context.Request.Cookies["lbGUID"].Value.ToString());}catch{return;}int lbGUIDCount = bllTJVisitor.Exists(new Guid(context.Request.Cookies["lbGUID"].Value)) == false ? 0 : 1;if (lbGUIDCount < 1) return;string vrid = bllMVisitorRecord.GetVRIDByGUID(lbGUID);if (vrid != ""){mdTJVisitorRecord = bllMVisitorRecord.GetModel(new Guid(vrid));mdTJVisitorRecord.outTime = Convert.ToDateTime(DateTime.Now);mdTJVisitorRecord.ExitPage = context.Request.UrlReferrer.ToString();bllMVisitorRecord.Update(mdTJVisitorRecord);}}context.Response.ContentType = "text/plain";context.Response.Write("");}private class Engine_wrod{public bool isEngine { get; set; }public string keyWord { get; set; }}/// <summary>/// 获取全部的搜索引擎/// </summary>/// <param name="input"></param>/// <returns></returns>private int Enginer(string input){Labbase.BLL.SearchEngine bll = new Labbase.BLL.SearchEngine();foreach (DataRow dr in bll.GetList("").Tables[0].Rows){if (input.Contains(dr["SDomin"].ToString())){return int.Parse(dr["SID"].ToString());}}return 0;}/// <summary>/// 判定是否是搜索引擎/// </summary>/// <param name="IP"></param>/// <param name="useragent"></param>/// <returns></returns>private bool isEngine(string IP, string useragent){Labbase.BLL.SerchEngineMark bllSerchEngineMark = new Labbase.BLL.SerchEngineMark();Labbase.BLL.SearchEngineIP bllSearchEngineIP = new Labbase.BLL.SearchEngineIP();if (string.IsNullOrEmpty(useragent)) return true;//如果没有useragent 设定为搜索引擎,不再继续统计.foreach (DataRow dr in bllSerchEngineMark.GetList("").Tables[0].Rows){if (useragent.Contains(dr["SEMString"].ToString())){return true;}}return bllSearchEngineIP.Exists("SEIP='" + IP + "'");}/// <summary>/// 获取搜索关键词/// </summary>/// <param name="url">来源地址</param>/// <returns></returns>private string GetKeyWord(string url){string keyword = "";string[] _uOsr = { "google", "yahoo", "baidu", "soso", "bing", "sogou", "so.com" }; //将几个搜索引擎与对应的搜索关系词写入对应的数组中string[] _uOkw = { "q", "q", "wd|word|kw|keyword", "w", "q", "query", "q" };for (int i = 0; i < _uOsr.Length; i++){if (url.Contains(_uOsr[i])) //如果URL中包含这几个搜索引擎则进入处理{if (_uOsr[i] == "baidu"){string[] temp = _uOkw[i].Split('|'); //来自百度的关系词 有WD和WORD,分开处理#region 现在的代码keyword = GetQuerystring(temp[0], url); //当以WD取不到的时候,则用WORD取词if (string.IsNullOrEmpty(keyword)) //指定对应的编码来消除乱码 {keyword = GetQuerystring(temp[1], url); //从URL中取得关键词的方法}if (string.IsNullOrEmpty(keyword)){keyword = GetQuerystring(temp[2], url);}if (string.IsNullOrEmpty(keyword)){keyword = GetQuerystring(temp[3], url);}#endregion}else{keyword = GetQuerystring(_uOkw[i], url);}break;}}string ecode = GBorUTF(keyword, url); //获得文字的编码格式keyword = HttpUtility.UrlDecode(keyword, Encoding.GetEncoding(ecode));keyword = HttpUtility.UrlEncode(keyword, Encoding.GetEncoding("UTF-8"));keyword = HttpUtility.UrlDecode(keyword, Encoding.GetEncoding("UTF-8"));return keyword;}/// <summary>/// 获取文字的编码/// </summary>/// <param name="input"></param>/// <returns></returns>private string GBorUTF(string input, string url){string en_code = "UTF-8";if (url.Contains("baidu")){if (url.Contains("ie=")){if (url.Contains("ie=gb2312")){en_code = "GB2312";}}else{string R_TO_U = HttpUtility.UrlDecode(input, Encoding.GetEncoding("UTF-8"));string U_TO_R = HttpUtility.UrlEncode(R_TO_U, Encoding.GetEncoding("UTF-8"));if (input.ToLower() != U_TO_R.ToLower()){en_code = "GB2312";}else{en_code = "UTF-8";}}}else if (url.Contains("sogou")){if (url.Contains("ie=")){if (url.Contains("ie=gb2312")){en_code = "GB2312";}}else{string R_TO_U = HttpUtility.UrlDecode(input, Encoding.GetEncoding("UTF-8"));string U_TO_R = HttpUtility.UrlEncode(R_TO_U, Encoding.GetEncoding("UTF-8"));if (input.ToLower() != U_TO_R.Replace("(", "%28").Replace(")", "%29").ToLower()){en_code = "GB2312";}}}else if (url.Contains("so.com")){if (url.Contains("ie=")){if (url.Contains("ie=gb2312")){en_code = "GB2312";}}else{string R_TO_U = HttpUtility.UrlDecode(input, Encoding.GetEncoding("UTF-8"));string U_TO_R = HttpUtility.UrlEncode(R_TO_U, Encoding.GetEncoding("UTF-8"));if (input.ToLower() != U_TO_R.ToLower()){en_code = "GB2312";}}}return en_code;}/// <summary>/// 从URL地址中通过queryname提取关键词/// </summary>/// <param name="queryname">wd,word,q,query,w...</param>/// <param name="url">URL地址</param>/// <returns></returns>private string GetQuerystring(string queryname, string url){string keyword = string.Empty;Dictionary<string, string> dic = new Dictionary<string, string>();string re = "[?&]([^=]+)(?:=([^&]*))?"; //通进正则将URL中参数分拆 放入字典中MatchCollection mc = Regex.Matches(url, re);foreach (Match item in mc){if (item.Success){dic.Add(item.Groups[1].Value, item.Groups[2].Value);}}if (dic.ContainsKey(queryname)) //如果字典中有传入的匹配关键词的键,则取其值返回{keyword = dic[queryname];}return keyword;}/// <summary>/// 获取链接的参数/// </summary>/// <param name="strQuery"></param>/// <param name="strSplit"></param>/// <returns></returns>protected string wordFromUrlQuery(string strQuery, string strSplit){strQuery = strQuery.TrimStart('?');strSplit += "=";foreach (string str in strQuery.Split('&')){if (str.StartsWith(strSplit)){string[] qValue = str.Split('=');if (qValue.Length > 1)return qValue[1];}}return "";}/// <summary>///根据IP获取地址/// </summary>/// <param name="strChar"></param>/// <returns></returns>protected string IPShowAddress(string strChar){string ip = strChar;IPScaner objScan = new IPScaner();objScan.DataPath = System.Web.HttpContext.Current.Server.MapPath(@"/js/QQWry.Dat");objScan.IP = ip;string addre = objScan.IPLocation();return addre;}public bool IsReusable{get{return false;}}
}
三、数据采集
在实现了具体功能之后,我们所要做的就是需要一个js脚本来控制该一般处理程序Stat.aspx的运行,具体脚本如下:
statistics.js:
$(function () {var urlreferrer = escape(document.referrer);var locationurl = escape(document.location);var ffreashed = getCookie("freash");if (ffreashed == document.location) {return;}if (getCookie("lbGUID") == null) {$.ajax({url: "/js/fingerprint2.js",dataType: "script",cache: true}).done(function (data, status, jqxhr) {var fp = new Fingerprint2();fp.get(function (result) {$.ajax({type: "POST",url: "/tools/Stat.ashx",data: { InPage: locationurl, referrer: urlreferrer, title: escape(document.title) },async: false,success: function () {$.ajax({type: "POST",url: "/tools/Stat.ashx",data: { InPage: locationurl, fingerprint: result, referrer: urlreferrer, title: escape(document.title) },async: false});},error: function (XMLHttpRequest, textStatus, errorThrown) {alert(XMLHttpRequest.status)}});});});}else {$.ajax({type: "POST",url: "/tools/Stat.ashx",data: { InPage: locationurl, referrer: urlreferrer, title: escape(document.title) },async: false,error: function (XMLHttpRequest, textStatus, errorThrown) {alert(XMLHttpRequest.status)}});}
});
//给重新刷新设置一个cookie
$(window).unload(function () {$.ajax({type: "POST",url: "/tools/Stat.ashx",data: { OutPage: escape(document.location) },async: false});setCookie("freash", document.location);
});
function setCookie(name, value) {var exp = new Date();exp.setTime(exp.getTime() + 2.5 * 1000);document.cookie = name + "=" + escape(value) + ";expires=" + exp.toGMTString();
}
function getCookie(name) {var arr, reg = new RegExp("(^| )" + name + "=([^;]*)(;|$)");if (arr = document.cookie.match(reg))return unescape(arr[2]);elsereturn null;
}
function delCookie(name) {var exp = new Date();exp.setTime(exp.getTime() - 1);var cval = getCookie(name);if (cval != null)document.cookie = name + "=" + cval + ";expires=" + exp.toGMTString();
}
到此基本的工作已经做完,剩下的就是直接将脚本引用到页面中,用户点击相应的页面自然也就可以实现数据采集与数据统计了。为了避免重复动作,最好将脚本引用到页面共用的用户控件中,下面是经过规范化处理的引用示例:
<script type="text/javascript">(function () {//网站内部统计var oHead = document.getElementsByTagName('HEAD').item(0);var oScript = document.createElement("script"); oScript.type = "text/javascript"; oScript.async = true;oScript.src = "/js/statistics.js";oHead.appendChild(oScript);})();
</script>
总的来说,数据采集无非就是记录下用户在本网站各页面的具体浏览轨迹,从而用于对用户的需求进行分析,采集就是统计的数据来源。